in_mcast.c revision 194951
1124208Sdes/*-
2124208Sdes * Copyright (c) 2007-2009 Bruce Simpson.
3124208Sdes * Copyright (c) 2005 Robert N. M. Watson.
4124208Sdes * All rights reserved.
5124208Sdes *
6124208Sdes * Redistribution and use in source and binary forms, with or without
7124208Sdes * modification, are permitted provided that the following conditions
8124208Sdes * are met:
9124208Sdes * 1. Redistributions of source code must retain the above copyright
10124208Sdes *    notice, this list of conditions and the following disclaimer.
11124208Sdes * 2. Redistributions in binary form must reproduce the above copyright
12124208Sdes *    notice, this list of conditions and the following disclaimer in the
13124208Sdes *    documentation and/or other materials provided with the distribution.
14124208Sdes * 3. The name of the author may not be used to endorse or promote
15124208Sdes *    products derived from this software without specific prior written
16124208Sdes *    permission.
17124208Sdes *
18124208Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19124208Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20124208Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21124208Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22124208Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23124208Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24124208Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2598937Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2698937Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2798937Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28197679Sdes * SUCH DAMAGE.
2998937Sdes */
3098937Sdes
3198937Sdes/*
3298937Sdes * IPv4 multicast socket, group, and socket option processing module.
33162852Sdes */
3498937Sdes
3598937Sdes#include <sys/cdefs.h>
3698937Sdes__FBSDID("$FreeBSD: head/sys/netinet/in_mcast.c 194951 2009-06-25 11:52:33Z rwatson $");
3798937Sdes
3898937Sdes#include <sys/param.h>
3998937Sdes#include <sys/systm.h>
4098937Sdes#include <sys/kernel.h>
4198937Sdes#include <sys/malloc.h>
4298937Sdes#include <sys/mbuf.h>
4398937Sdes#include <sys/protosw.h>
4498937Sdes#include <sys/socket.h>
4598937Sdes#include <sys/socketvar.h>
4698937Sdes#include <sys/protosw.h>
4798937Sdes#include <sys/sysctl.h>
4898937Sdes#include <sys/vimage.h>
4998937Sdes#include <sys/ktr.h>
5098937Sdes#include <sys/tree.h>
5198937Sdes
5298937Sdes#include <net/if.h>
5398937Sdes#include <net/if_dl.h>
5498937Sdes#include <net/route.h>
5598937Sdes#include <net/vnet.h>
5698937Sdes
57149749Sdes#include <netinet/in.h>
58149749Sdes#include <netinet/in_systm.h>
59149749Sdes#include <netinet/in_pcb.h>
60149749Sdes#include <netinet/in_var.h>
61149749Sdes#include <netinet/ip_var.h>
6298937Sdes#include <netinet/igmp_var.h>
6398937Sdes#include <netinet/vinet.h>
6498937Sdes
65149749Sdes#ifndef KTR_IGMPV3
66149749Sdes#define KTR_IGMPV3 KTR_INET
67149749Sdes#endif
68149749Sdes
69149749Sdes#ifndef __SOCKUNION_DECLARED
70149749Sdesunion sockunion {
71181111Sdes	struct sockaddr_storage	ss;
72149749Sdes	struct sockaddr		sa;
73149749Sdes	struct sockaddr_dl	sdl;
74149749Sdes	struct sockaddr_in	sin;
7598937Sdes};
7698937Sdestypedef union sockunion sockunion_t;
7798937Sdes#define __SOCKUNION_DECLARED
7898937Sdes#endif /* __SOCKUNION_DECLARED */
7998937Sdes
8098937Sdesstatic MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
8198937Sdes    "IPv4 multicast PCB-layer source filter");
8298937Sdesstatic MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
8398937Sdesstatic MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
8498937Sdesstatic MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
8598937Sdes    "IPv4 multicast IGMP-layer source filter");
8698937Sdes
8798937Sdes#ifdef VIMAGE_GLOBALS
8898937Sdesstruct in_multihead in_multihead;	/* XXX now unused; retain for ABI */
8998937Sdes#endif
9098937Sdes
9198937Sdes/*
9298937Sdes * Locking:
93162852Sdes * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
94162852Sdes * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
9598937Sdes *   it can be taken by code in net/if.c also.
9698937Sdes * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
9798937Sdes *
9898937Sdes * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
9998937Sdes * any need for in_multi itself to be virtualized -- it is bound to an ifp
10098937Sdes * anyway no matter what happens.
101126274Sdes */
10298937Sdesstruct mtx in_multi_mtx;
10398937SdesMTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
10498937Sdes
10598937Sdes/*
10698937Sdes * Functions with non-static linkage defined in this file should be
10798937Sdes * declared in in_var.h:
10898937Sdes *  imo_multi_filter()
10998937Sdes *  in_addmulti()
11098937Sdes *  in_delmulti()
11198937Sdes *  in_joingroup()
11298937Sdes *  in_joingroup_locked()
11398937Sdes *  in_leavegroup()
11498937Sdes *  in_leavegroup_locked()
11598937Sdes * and ip_var.h:
11698937Sdes *  inp_freemoptions()
11798937Sdes *  inp_getmoptions()
11898937Sdes *  inp_setmoptions()
11998937Sdes *
12098937Sdes * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
12198937Sdes * and in_delmulti().
12298937Sdes */
12398937Sdesstatic void	imf_commit(struct in_mfilter *);
12498937Sdesstatic int	imf_get_source(struct in_mfilter *imf,
12598937Sdes		    const struct sockaddr_in *psin,
12698937Sdes		    struct in_msource **);
12798937Sdesstatic struct in_msource *
12898937Sdes		imf_graft(struct in_mfilter *, const uint8_t,
12998937Sdes		    const struct sockaddr_in *);
13098937Sdesstatic void	imf_leave(struct in_mfilter *);
13198937Sdesstatic int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
13298937Sdesstatic void	imf_purge(struct in_mfilter *);
13398937Sdesstatic void	imf_rollback(struct in_mfilter *);
13498937Sdesstatic void	imf_reap(struct in_mfilter *);
13598937Sdesstatic int	imo_grow(struct ip_moptions *);
13698937Sdesstatic size_t	imo_match_group(const struct ip_moptions *,
13798937Sdes		    const struct ifnet *, const struct sockaddr *);
13898937Sdesstatic struct in_msource *
13998937Sdes		imo_match_source(const struct ip_moptions *, const size_t,
14098937Sdes		    const struct sockaddr *);
14198937Sdesstatic void	ims_merge(struct ip_msource *ims,
14298937Sdes		    const struct in_msource *lims, const int rollback);
143106121Sdesstatic int	in_getmulti(struct ifnet *, const struct in_addr *,
14498937Sdes		    struct in_multi **);
14598937Sdesstatic int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
14698937Sdes		    const int noalloc, struct ip_msource **pims);
14798937Sdesstatic int	inm_is_ifp_detached(const struct in_multi *);
14898937Sdesstatic int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
14998937Sdesstatic void	inm_purge(struct in_multi *);
15098937Sdesstatic void	inm_reap(struct in_multi *);
15198937Sdesstatic struct ip_moptions *
15298937Sdes		inp_findmoptions(struct inpcb *);
15398937Sdesstatic int	inp_get_source_filters(struct inpcb *, struct sockopt *);
15498937Sdesstatic int	inp_join_group(struct inpcb *, struct sockopt *);
15598937Sdesstatic int	inp_leave_group(struct inpcb *, struct sockopt *);
15698937Sdesstatic struct ifnet *
15798937Sdes		inp_lookup_mcast_ifp(const struct inpcb *,
15898937Sdes		    const struct sockaddr_in *, const struct in_addr);
15998937Sdesstatic int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
16098937Sdesstatic int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
16198937Sdesstatic int	inp_set_source_filters(struct inpcb *, struct sockopt *);
16298937Sdesstatic int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
16398937Sdes
164106121SdesSYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
16598937Sdes
16698937Sdesstatic u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
16798937SdesSYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
16898937Sdes    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
16998937Sdes    "Max source filters per group");
17098937SdesTUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
17198937Sdes
172106121Sdesstatic u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
17398937SdesSYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
17498937Sdes    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
17598937Sdes    "Max source filters per socket");
17698937SdesTUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
177106121Sdes
17898937Sdesint in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
17998937SdesSYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
18098937Sdes    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
181106121SdesTUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
18298937Sdes
18398937SdesSYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
18498937Sdes    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
18598937Sdes    "Per-interface stack-wide source filters");
18698937Sdes
18798937Sdes/*
18898937Sdes * Inline function which wraps assertions for a valid ifp.
18998937Sdes * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
19098937Sdes * is detached.
19198937Sdes */
19298937Sdesstatic int __inline
19398937Sdesinm_is_ifp_detached(const struct in_multi *inm)
19498937Sdes{
19598937Sdes	struct ifnet *ifp;
19698937Sdes
19798937Sdes	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
19898937Sdes	ifp = inm->inm_ifma->ifma_ifp;
19998937Sdes	if (ifp != NULL) {
20098937Sdes		/*
201106121Sdes		 * Sanity check that netinet's notion of ifp is the
20298937Sdes		 * same as net's.
20398937Sdes		 */
20498937Sdes		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
20598937Sdes	}
20698937Sdes
20798937Sdes	return (ifp == NULL);
20898937Sdes}
20998937Sdes
21098937Sdes/*
21198937Sdes * Initialize an in_mfilter structure to a known state at t0, t1
21298937Sdes * with an empty source filter list.
21398937Sdes */
214106121Sdesstatic __inline void
21598937Sdesimf_init(struct in_mfilter *imf, const int st0, const int st1)
21698937Sdes{
21798937Sdes	memset(imf, 0, sizeof(struct in_mfilter));
21898937Sdes	RB_INIT(&imf->imf_sources);
21998937Sdes	imf->imf_st[0] = st0;
22098937Sdes	imf->imf_st[1] = st1;
22198937Sdes}
22298937Sdes
22398937Sdes/*
22498937Sdes * Resize the ip_moptions vector to the next power-of-two minus 1.
22598937Sdes * May be called with locks held; do not sleep.
22698937Sdes */
22798937Sdesstatic int
22898937Sdesimo_grow(struct ip_moptions *imo)
22998937Sdes{
23098937Sdes	struct in_multi		**nmships;
23198937Sdes	struct in_multi		**omships;
23298937Sdes	struct in_mfilter	 *nmfilters;
23398937Sdes	struct in_mfilter	 *omfilters;
23498937Sdes	size_t			  idx;
23598937Sdes	size_t			  newmax;
23698937Sdes	size_t			  oldmax;
23798937Sdes
23898937Sdes	nmships = NULL;
23998937Sdes	nmfilters = NULL;
24098937Sdes	omships = imo->imo_membership;
24198937Sdes	omfilters = imo->imo_mfilters;
24298937Sdes	oldmax = imo->imo_max_memberships;
24398937Sdes	newmax = ((oldmax + 1) * 2) - 1;
24498937Sdes
24598937Sdes	if (newmax <= IP_MAX_MEMBERSHIPS) {
24698937Sdes		nmships = (struct in_multi **)realloc(omships,
24798937Sdes		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
24898937Sdes		nmfilters = (struct in_mfilter *)realloc(omfilters,
249106121Sdes		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
250106121Sdes		if (nmships != NULL && nmfilters != NULL) {
251106121Sdes			/* Initialize newly allocated source filter heads. */
252106121Sdes			for (idx = oldmax; idx < newmax; idx++) {
25398937Sdes				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
25498937Sdes				    MCAST_EXCLUDE);
25598937Sdes			}
256126274Sdes			imo->imo_max_memberships = newmax;
25798937Sdes			imo->imo_membership = nmships;
25898937Sdes			imo->imo_mfilters = nmfilters;
25998937Sdes		}
26098937Sdes	}
26198937Sdes
26298937Sdes	if (nmships == NULL || nmfilters == NULL) {
26398937Sdes		if (nmships != NULL)
26498937Sdes			free(nmships, M_IPMOPTS);
26598937Sdes		if (nmfilters != NULL)
26698937Sdes			free(nmfilters, M_INMFILTER);
26798937Sdes		return (ETOOMANYREFS);
26898937Sdes	}
26998937Sdes
27098937Sdes	return (0);
27198937Sdes}
27298937Sdes
27398937Sdes/*
27498937Sdes * Find an IPv4 multicast group entry for this ip_moptions instance
27598937Sdes * which matches the specified group, and optionally an interface.
27698937Sdes * Return its index into the array, or -1 if not found.
27798937Sdes */
27898937Sdesstatic size_t
27998937Sdesimo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
28098937Sdes    const struct sockaddr *group)
28198937Sdes{
28298937Sdes	const struct sockaddr_in *gsin;
28398937Sdes	struct in_multi	**pinm;
28498937Sdes	int		  idx;
28598937Sdes	int		  nmships;
28698937Sdes
28798937Sdes	gsin = (const struct sockaddr_in *)group;
28898937Sdes
28998937Sdes	/* The imo_membership array may be lazy allocated. */
29098937Sdes	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
29198937Sdes		return (-1);
29298937Sdes
29398937Sdes	nmships = imo->imo_num_memberships;
29498937Sdes	pinm = &imo->imo_membership[0];
29598937Sdes	for (idx = 0; idx < nmships; idx++, pinm++) {
29698937Sdes		if (*pinm == NULL)
29798937Sdes			continue;
29898937Sdes		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
29998937Sdes		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
300146998Sdes			break;
301146998Sdes		}
302146998Sdes	}
303197679Sdes	if (idx >= nmships)
304197679Sdes		idx = -1;
305197679Sdes
306146998Sdes	return (idx);
30798937Sdes}
30898937Sdes
30998937Sdes/*
31098937Sdes * Find an IPv4 multicast source entry for this imo which matches
31198937Sdes * the given group index for this socket, and source address.
31298937Sdes *
31398937Sdes * NOTE: This does not check if the entry is in-mode, merely if
31498937Sdes * it exists, which may not be the desired behaviour.
31598937Sdes */
31698937Sdesstatic struct in_msource *
31798937Sdesimo_match_source(const struct ip_moptions *imo, const size_t gidx,
31898937Sdes    const struct sockaddr *src)
31998937Sdes{
32098937Sdes	struct ip_msource	 find;
32198937Sdes	struct in_mfilter	*imf;
32298937Sdes	struct ip_msource	*ims;
32398937Sdes	const sockunion_t	*psa;
32498937Sdes
32598937Sdes	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
32698937Sdes	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
32798937Sdes	    ("%s: invalid index %d\n", __func__, (int)gidx));
32898937Sdes
32998937Sdes	/* The imo_mfilters array may be lazy allocated. */
33098937Sdes	if (imo->imo_mfilters == NULL)
33198937Sdes		return (NULL);
33298937Sdes	imf = &imo->imo_mfilters[gidx];
33398937Sdes
33498937Sdes	/* Source trees are keyed in host byte order. */
33598937Sdes	psa = (const sockunion_t *)src;
33698937Sdes	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
33798937Sdes	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
33898937Sdes
339124208Sdes	return ((struct in_msource *)ims);
340124208Sdes}
341124208Sdes
342124208Sdes/*
34398937Sdes * Perform filtering for multicast datagrams on a socket by group and source.
34498937Sdes *
34598937Sdes * Returns 0 if a datagram should be allowed through, or various error codes
34698937Sdes * if the socket was not a member of the group, or the source was muted, etc.
34798937Sdes */
34898937Sdesint
34998937Sdesimo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
35098937Sdes    const struct sockaddr *group, const struct sockaddr *src)
35198937Sdes{
35298937Sdes	size_t gidx;
35398937Sdes	struct in_msource *ims;
35498937Sdes	int mode;
35598937Sdes
35698937Sdes	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
35798937Sdes
35898937Sdes	gidx = imo_match_group(imo, ifp, group);
35998937Sdes	if (gidx == -1)
36098937Sdes		return (MCAST_NOTGMEMBER);
36198937Sdes
36298937Sdes	/*
36398937Sdes	 * Check if the source was included in an (S,G) join.
36498937Sdes	 * Allow reception on exclusive memberships by default,
36598937Sdes	 * reject reception on inclusive memberships by default.
36698937Sdes	 * Exclude source only if an in-mode exclude filter exists.
36798937Sdes	 * Include source only if an in-mode include filter exists.
36898937Sdes	 * NOTE: We are comparing group state here at IGMP t1 (now)
36998937Sdes	 * with socket-layer t0 (since last downcall).
37098937Sdes	 */
37198937Sdes	mode = imo->imo_mfilters[gidx].imf_st[1];
37298937Sdes	ims = imo_match_source(imo, gidx, src);
37398937Sdes
37498937Sdes	if ((ims == NULL && mode == MCAST_INCLUDE) ||
37598937Sdes	    (ims != NULL && ims->imsl_st[0] != mode))
37698937Sdes		return (MCAST_NOTSMEMBER);
37798937Sdes
37898937Sdes	return (MCAST_PASS);
37998937Sdes}
38098937Sdes
38198937Sdes/*
38298937Sdes * Find and return a reference to an in_multi record for (ifp, group),
38398937Sdes * and bump its reference count.
38498937Sdes * If one does not exist, try to allocate it, and update link-layer multicast
38598937Sdes * filters on ifp to listen for group.
38698937Sdes * Assumes the IN_MULTI lock is held across the call.
38798937Sdes * Return 0 if successful, otherwise return an appropriate error code.
38898937Sdes */
38998937Sdesstatic int
39098937Sdesin_getmulti(struct ifnet *ifp, const struct in_addr *group,
39198937Sdes    struct in_multi **pinm)
39298937Sdes{
39398937Sdes	struct sockaddr_in	 gsin;
39498937Sdes	struct ifmultiaddr	*ifma;
39598937Sdes	struct in_ifinfo	*ii;
39698937Sdes	struct in_multi		*inm;
39798937Sdes	int error;
39898937Sdes
39998937Sdes	IN_MULTI_LOCK_ASSERT();
40098937Sdes
40198937Sdes	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
40298937Sdes
40398937Sdes	inm = inm_lookup(ifp, *group);
40498937Sdes	if (inm != NULL) {
40598937Sdes		/*
40698937Sdes		 * If we already joined this group, just bump the
40798937Sdes		 * refcount and return it.
40898937Sdes		 */
409113908Sdes		KASSERT(inm->inm_refcount >= 1,
410113908Sdes		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
411113908Sdes		++inm->inm_refcount;
412113908Sdes		*pinm = inm;
413113908Sdes		return (0);
414113908Sdes	}
415113908Sdes
416113908Sdes	memset(&gsin, 0, sizeof(gsin));
417113908Sdes	gsin.sin_family = AF_INET;
418113908Sdes	gsin.sin_len = sizeof(struct sockaddr_in);
419113908Sdes	gsin.sin_addr = *group;
420113908Sdes
421113908Sdes	/*
422113908Sdes	 * Check if a link-layer group is already associated
42398937Sdes	 * with this network-layer group on the given ifnet.
42498937Sdes	 */
42598937Sdes	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
42698937Sdes	if (error != 0)
42798937Sdes		return (error);
42898937Sdes
42998937Sdes	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
43098937Sdes	IF_ADDR_LOCK(ifp);
43198937Sdes
43298937Sdes	/*
43398937Sdes	 * If something other than netinet is occupying the link-layer
43498937Sdes	 * group, print a meaningful error message and back out of
43598937Sdes	 * the allocation.
43698937Sdes	 * Otherwise, bump the refcount on the existing network-layer
437149749Sdes	 * group association and return it.
438149749Sdes	 */
439149749Sdes	if (ifma->ifma_protospec != NULL) {
440149749Sdes		inm = (struct in_multi *)ifma->ifma_protospec;
441157016Sdes#ifdef INVARIANTS
442157016Sdes		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
443157016Sdes		    __func__));
444157016Sdes		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
445181111Sdes		    ("%s: ifma not AF_INET", __func__));
446181111Sdes		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
447181111Sdes		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
448181111Sdes		    !in_hosteq(inm->inm_addr, *group))
44998937Sdes			panic("%s: ifma %p is inconsistent with %p (%s)",
45098937Sdes			    __func__, ifma, inm, inet_ntoa(*group));
45198937Sdes#endif
45298937Sdes		++inm->inm_refcount;
45398937Sdes		*pinm = inm;
45498937Sdes		IF_ADDR_UNLOCK(ifp);
45598937Sdes		return (0);
45698937Sdes	}
45798937Sdes
45898937Sdes	IF_ADDR_LOCK_ASSERT(ifp);
45998937Sdes
46098937Sdes	/*
46198937Sdes	 * A new in_multi record is needed; allocate and initialize it.
46298937Sdes	 * We DO NOT perform an IGMP join as the in_ layer may need to
46398937Sdes	 * push an initial source list down to IGMP to support SSM.
46498937Sdes	 *
46598937Sdes	 * The initial source filter state is INCLUDE, {} as per the RFC.
46698937Sdes	 */
46798937Sdes	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
46898937Sdes	if (inm == NULL) {
46998937Sdes		if_delmulti_ifma(ifma);
47098937Sdes		IF_ADDR_UNLOCK(ifp);
471124208Sdes		return (ENOMEM);
472124208Sdes	}
473124208Sdes	inm->inm_addr = *group;
474124208Sdes	inm->inm_ifp = ifp;
475124208Sdes	inm->inm_igi = ii->ii_igmp;
476124208Sdes	inm->inm_ifma = ifma;
477124208Sdes	inm->inm_refcount = 1;
478124208Sdes	inm->inm_state = IGMP_NOT_MEMBER;
479124208Sdes
480124208Sdes	/*
481124208Sdes	 * Pending state-changes per group are subject to a bounds check.
482124208Sdes	 */
483124208Sdes	IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
484124208Sdes
485124208Sdes	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
486124208Sdes	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
487181111Sdes	RB_INIT(&inm->inm_srcs);
488137015Sdes
489137015Sdes	ifma->ifma_protospec = inm;
490124208Sdes
491162852Sdes	*pinm = inm;
492162852Sdes
493162852Sdes	IF_ADDR_UNLOCK(ifp);
494162852Sdes	return (0);
495162852Sdes}
496162852Sdes
497162852Sdes/*
498162852Sdes * Drop a reference to an in_multi record.
499162852Sdes *
500162852Sdes * If the refcount drops to 0, free the in_multi record and
501162852Sdes * delete the underlying link-layer membership.
502162852Sdes */
503162852Sdesvoid
504162852Sdesinm_release_locked(struct in_multi *inm)
505162852Sdes{
506162852Sdes	struct ifmultiaddr *ifma;
50798937Sdes
50898937Sdes	IN_MULTI_LOCK_ASSERT();
50998937Sdes
51098937Sdes	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
51198937Sdes
51298937Sdes	if (--inm->inm_refcount > 0) {
51398937Sdes		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
51499060Sdes		    inm->inm_refcount);
51599060Sdes		return;
51699060Sdes	}
51799060Sdes
51899060Sdes	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
51999060Sdes
52099060Sdes	ifma = inm->inm_ifma;
52199060Sdes
52299060Sdes	/* XXX this access is not covered by IF_ADDR_LOCK */
52399060Sdes	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
52499060Sdes	KASSERT(ifma->ifma_protospec == inm,
52599060Sdes	    ("%s: ifma_protospec != inm", __func__));
52698937Sdes	ifma->ifma_protospec = NULL;
52798937Sdes
52898937Sdes	inm_purge(inm);
52998937Sdes
53098937Sdes	free(inm, M_IPMADDR);
53198937Sdes
53298937Sdes	if_delmulti_ifma(ifma);
53398937Sdes}
53498937Sdes
53598937Sdes/*
53698937Sdes * Clear recorded source entries for a group.
53798937Sdes * Used by the IGMP code. Caller must hold the IN_MULTI lock.
538128456Sdes * FIXME: Should reap.
539128456Sdes */
540128456Sdesvoid
541128456Sdesinm_clear_recorded(struct in_multi *inm)
542181111Sdes{
543181111Sdes	struct ip_msource	*ims;
544181111Sdes
545181111Sdes	IN_MULTI_LOCK_ASSERT();
546162852Sdes
547162852Sdes	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
548162852Sdes		if (ims->ims_stp) {
549162852Sdes			ims->ims_stp = 0;
550162852Sdes			--inm->inm_st[1].iss_rec;
55198937Sdes		}
55298937Sdes	}
55398937Sdes	KASSERT(inm->inm_st[1].iss_rec == 0,
55498937Sdes	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
55598937Sdes}
55698937Sdes
55798937Sdes/*
55898937Sdes * Record a source as pending for a Source-Group IGMPv3 query.
55998937Sdes * This lives here as it modifies the shared tree.
560162852Sdes *
56198937Sdes * inm is the group descriptor.
56298937Sdes * naddr is the address of the source to record in network-byte order.
56398937Sdes *
564146998Sdes * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
565146998Sdes * lazy-allocate a source node in response to an SG query.
566146998Sdes * Otherwise, no allocation is performed. This saves some memory
567146998Sdes * with the trade-off that the source will not be reported to the
568146998Sdes * router if joined in the window between the query response and
56998937Sdes * the group actually being joined on the local host.
57098937Sdes *
57198937Sdes * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
57298937Sdes * This turns off the allocation of a recorded source entry if
57398937Sdes * the group has not been joined.
57498937Sdes *
575124208Sdes * Return 0 if the source didn't exist or was already marked as recorded.
576124208Sdes * Return 1 if the source was marked as recorded by this function.
577124208Sdes * Return <0 if any error occured (negated errno code).
578124208Sdes */
579128456Sdesint
580128456Sdesinm_record_source(struct in_multi *inm, const in_addr_t naddr)
581128456Sdes{
582128456Sdes	struct ip_msource	 find;
583128456Sdes	struct ip_msource	*ims, *nims;
584128456Sdes
585126274Sdes	IN_MULTI_LOCK_ASSERT();
586126274Sdes
587126274Sdes	find.ims_haddr = ntohl(naddr);
588126274Sdes	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
589126274Sdes	if (ims && ims->ims_stp)
590126274Sdes		return (0);
591126274Sdes	if (ims == NULL) {
592181111Sdes		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
593181111Sdes			return (-ENOSPC);
594181111Sdes		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
595181111Sdes		    M_NOWAIT | M_ZERO);
596181111Sdes		if (nims == NULL)
597197679Sdes			return (-ENOMEM);
598197679Sdes		nims->ims_haddr = find.ims_haddr;
599197679Sdes		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
600197679Sdes		++inm->inm_nsrc;
601181111Sdes		ims = nims;
602181111Sdes	}
603181111Sdes
604181111Sdes	/*
605149749Sdes	 * Mark the source as recorded and update the recorded
606149749Sdes	 * source count.
607149749Sdes	 */
608149749Sdes	++ims->ims_stp;
609149749Sdes	++inm->inm_st[1].iss_rec;
610149749Sdes
611149749Sdes	return (1);
612149749Sdes}
613149749Sdes
614149749Sdes/*
615149749Sdes * Return a pointer to an in_msource owned by an in_mfilter,
616149749Sdes * given its source address.
617149749Sdes * Lazy-allocate if needed. If this is a new entry its filter state is
618149749Sdes * undefined at t0.
619149749Sdes *
620149749Sdes * imf is the filter set being modified.
621149749Sdes * haddr is the source address in *host* byte-order.
622126274Sdes *
62398937Sdes * SMPng: May be called with locks held; malloc must not block.
62498937Sdes */
62598937Sdesstatic int
62698937Sdesimf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
62798937Sdes    struct in_msource **plims)
62898937Sdes{
62998937Sdes	struct ip_msource	 find;
63098937Sdes	struct ip_msource	*ims, *nims;
63198937Sdes	struct in_msource	*lims;
63298937Sdes	int			 error;
63398937Sdes
63498937Sdes	error = 0;
63598937Sdes	ims = NULL;
63698937Sdes	lims = NULL;
63798937Sdes
63898937Sdes	/* key is host byte order */
63998937Sdes	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
64098937Sdes	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
64198937Sdes	lims = (struct in_msource *)ims;
64298937Sdes	if (lims == NULL) {
64398937Sdes		if (imf->imf_nsrc == in_mcast_maxsocksrc)
64498937Sdes			return (ENOSPC);
64598937Sdes		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
64698937Sdes		    M_NOWAIT | M_ZERO);
64798937Sdes		if (nims == NULL)
64898937Sdes			return (ENOMEM);
64998937Sdes		lims = (struct in_msource *)nims;
65098937Sdes		lims->ims_haddr = find.ims_haddr;
65198937Sdes		lims->imsl_st[0] = MCAST_UNDEFINED;
65298937Sdes		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
65398937Sdes		++imf->imf_nsrc;
65498937Sdes	}
65598937Sdes
65698937Sdes	*plims = lims;
65798937Sdes
65898937Sdes	return (error);
65998937Sdes}
66098937Sdes
66198937Sdes/*
66298937Sdes * Graft a source entry into an existing socket-layer filter set,
66398937Sdes * maintaining any required invariants and checking allocations.
66498937Sdes *
66598937Sdes * The source is marked as being in the new filter mode at t1.
666126274Sdes *
667126274Sdes * Return the pointer to the new node, otherwise return NULL.
668126274Sdes */
66998937Sdesstatic struct in_msource *
67098937Sdesimf_graft(struct in_mfilter *imf, const uint8_t st1,
67198937Sdes    const struct sockaddr_in *psin)
67298937Sdes{
67398937Sdes	struct ip_msource	*nims;
67498937Sdes	struct in_msource	*lims;
67598937Sdes
67698937Sdes	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
677202213Sed	    M_NOWAIT | M_ZERO);
67898937Sdes	if (nims == NULL)
67998937Sdes		return (NULL);
68098937Sdes	lims = (struct in_msource *)nims;
68198937Sdes	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
68298937Sdes	lims->imsl_st[0] = MCAST_UNDEFINED;
68398937Sdes	lims->imsl_st[1] = st1;
68498937Sdes	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
68598937Sdes	++imf->imf_nsrc;
68698937Sdes
68798937Sdes	return (lims);
68898937Sdes}
68998937Sdes
69098937Sdes/*
69198937Sdes * Prune a source entry from an existing socket-layer filter set,
692128456Sdes * maintaining any required invariants and checking allocations.
693128456Sdes *
694128456Sdes * The source is marked as being left at t1, it is not freed.
695128456Sdes *
69698937Sdes * Return 0 if no error occurred, otherwise return an errno value.
69798937Sdes */
69898937Sdesstatic int
69998937Sdesimf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
70098937Sdes{
701128456Sdes	struct ip_msource	 find;
702128456Sdes	struct ip_msource	*ims;
703128456Sdes	struct in_msource	*lims;
704128456Sdes
705128456Sdes	/* key is host byte order */
706128456Sdes	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
707128456Sdes	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
708192595Sdes	if (ims == NULL)
709149749Sdes		return (ENOENT);
710149749Sdes	lims = (struct in_msource *)ims;
711181111Sdes	lims->imsl_st[1] = MCAST_UNDEFINED;
712181111Sdes	return (0);
713181111Sdes}
714149749Sdes
715146998Sdes/*
716146998Sdes * Revert socket-layer filter set deltas at t1 to t0 state.
717146998Sdes */
718146998Sdesstatic void
719146998Sdesimf_rollback(struct in_mfilter *imf)
720146998Sdes{
721146998Sdes	struct ip_msource	*ims, *tims;
722146998Sdes	struct in_msource	*lims;
723146998Sdes
72498937Sdes	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
72598937Sdes		lims = (struct in_msource *)ims;
726149749Sdes		if (lims->imsl_st[0] == lims->imsl_st[1]) {
727149749Sdes			/* no change at t1 */
728149749Sdes			continue;
729149749Sdes		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
730149749Sdes			/* revert change to existing source at t1 */
731149749Sdes			lims->imsl_st[1] = lims->imsl_st[0];
732149749Sdes		} else {
733149749Sdes			/* revert source added t1 */
734162852Sdes			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
735162852Sdes			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
736162852Sdes			free(ims, M_INMFILTER);
737162852Sdes			imf->imf_nsrc--;
738162852Sdes		}
739162852Sdes	}
740162852Sdes	imf->imf_st[1] = imf->imf_st[0];
741162852Sdes}
742157016Sdes
743157016Sdes/*
744181111Sdes * Mark socket-layer filter set as INCLUDE {} at t1.
745181111Sdes */
746181111Sdesstatic void
747181111Sdesimf_leave(struct in_mfilter *imf)
748192595Sdes{
749192595Sdes	struct ip_msource	*ims;
750192595Sdes	struct in_msource	*lims;
751192595Sdes
752197679Sdes	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
753197679Sdes		lims = (struct in_msource *)ims;
754197679Sdes		lims->imsl_st[1] = MCAST_UNDEFINED;
755197679Sdes	}
75698937Sdes	imf->imf_st[1] = MCAST_INCLUDE;
757}
758
759/*
760 * Mark socket-layer filter set deltas as committed.
761 */
762static void
763imf_commit(struct in_mfilter *imf)
764{
765	struct ip_msource	*ims;
766	struct in_msource	*lims;
767
768	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
769		lims = (struct in_msource *)ims;
770		lims->imsl_st[0] = lims->imsl_st[1];
771	}
772	imf->imf_st[0] = imf->imf_st[1];
773}
774
775/*
776 * Reap unreferenced sources from socket-layer filter set.
777 */
778static void
779imf_reap(struct in_mfilter *imf)
780{
781	struct ip_msource	*ims, *tims;
782	struct in_msource	*lims;
783
784	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
785		lims = (struct in_msource *)ims;
786		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
787		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
788			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
789			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
790			free(ims, M_INMFILTER);
791			imf->imf_nsrc--;
792		}
793	}
794}
795
796/*
797 * Purge socket-layer filter set.
798 */
799static void
800imf_purge(struct in_mfilter *imf)
801{
802	struct ip_msource	*ims, *tims;
803
804	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
805		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
806		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
807		free(ims, M_INMFILTER);
808		imf->imf_nsrc--;
809	}
810	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
811	KASSERT(RB_EMPTY(&imf->imf_sources),
812	    ("%s: imf_sources not empty", __func__));
813}
814
815/*
816 * Look up a source filter entry for a multicast group.
817 *
818 * inm is the group descriptor to work with.
819 * haddr is the host-byte-order IPv4 address to look up.
820 * noalloc may be non-zero to suppress allocation of sources.
821 * *pims will be set to the address of the retrieved or allocated source.
822 *
823 * SMPng: NOTE: may be called with locks held.
824 * Return 0 if successful, otherwise return a non-zero error code.
825 */
826static int
827inm_get_source(struct in_multi *inm, const in_addr_t haddr,
828    const int noalloc, struct ip_msource **pims)
829{
830	struct ip_msource	 find;
831	struct ip_msource	*ims, *nims;
832#ifdef KTR
833	struct in_addr ia;
834#endif
835
836	find.ims_haddr = haddr;
837	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
838	if (ims == NULL && !noalloc) {
839		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
840			return (ENOSPC);
841		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
842		    M_NOWAIT | M_ZERO);
843		if (nims == NULL)
844			return (ENOMEM);
845		nims->ims_haddr = haddr;
846		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
847		++inm->inm_nsrc;
848		ims = nims;
849#ifdef KTR
850		ia.s_addr = htonl(haddr);
851		CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
852		    inet_ntoa(ia), ims);
853#endif
854	}
855
856	*pims = ims;
857	return (0);
858}
859
860/*
861 * Merge socket-layer source into IGMP-layer source.
862 * If rollback is non-zero, perform the inverse of the merge.
863 */
864static void
865ims_merge(struct ip_msource *ims, const struct in_msource *lims,
866    const int rollback)
867{
868	int n = rollback ? -1 : 1;
869#ifdef KTR
870	struct in_addr ia;
871
872	ia.s_addr = htonl(ims->ims_haddr);
873#endif
874
875	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
876		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
877		    __func__, n, inet_ntoa(ia));
878		ims->ims_st[1].ex -= n;
879	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
880		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
881		    __func__, n, inet_ntoa(ia));
882		ims->ims_st[1].in -= n;
883	}
884
885	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
886		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
887		    __func__, n, inet_ntoa(ia));
888		ims->ims_st[1].ex += n;
889	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
890		CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
891		    __func__, n, inet_ntoa(ia));
892		ims->ims_st[1].in += n;
893	}
894}
895
896/*
897 * Atomically update the global in_multi state, when a membership's
898 * filter list is being updated in any way.
899 *
900 * imf is the per-inpcb-membership group filter pointer.
901 * A fake imf may be passed for in-kernel consumers.
902 *
903 * XXX This is a candidate for a set-symmetric-difference style loop
904 * which would eliminate the repeated lookup from root of ims nodes,
905 * as they share the same key space.
906 *
907 * If any error occurred this function will back out of refcounts
908 * and return a non-zero value.
909 */
910static int
911inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
912{
913	struct ip_msource	*ims, *nims;
914	struct in_msource	*lims;
915	int			 schanged, error;
916	int			 nsrc0, nsrc1;
917
918	schanged = 0;
919	error = 0;
920	nsrc1 = nsrc0 = 0;
921
922	/*
923	 * Update the source filters first, as this may fail.
924	 * Maintain count of in-mode filters at t0, t1. These are
925	 * used to work out if we transition into ASM mode or not.
926	 * Maintain a count of source filters whose state was
927	 * actually modified by this operation.
928	 */
929	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
930		lims = (struct in_msource *)ims;
931		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
932		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
933		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
934		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
935		++schanged;
936		if (error)
937			break;
938		ims_merge(nims, lims, 0);
939	}
940	if (error) {
941		struct ip_msource *bims;
942
943		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
944			lims = (struct in_msource *)ims;
945			if (lims->imsl_st[0] == lims->imsl_st[1])
946				continue;
947			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
948			if (bims == NULL)
949				continue;
950			ims_merge(bims, lims, 1);
951		}
952		goto out_reap;
953	}
954
955	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
956	    __func__, nsrc0, nsrc1);
957
958	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
959	if (imf->imf_st[0] == imf->imf_st[1] &&
960	    imf->imf_st[1] == MCAST_INCLUDE) {
961		if (nsrc1 == 0) {
962			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
963			--inm->inm_st[1].iss_in;
964		}
965	}
966
967	/* Handle filter mode transition on socket. */
968	if (imf->imf_st[0] != imf->imf_st[1]) {
969		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
970		    __func__, imf->imf_st[0], imf->imf_st[1]);
971
972		if (imf->imf_st[0] == MCAST_EXCLUDE) {
973			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
974			--inm->inm_st[1].iss_ex;
975		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
976			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
977			--inm->inm_st[1].iss_in;
978		}
979
980		if (imf->imf_st[1] == MCAST_EXCLUDE) {
981			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
982			inm->inm_st[1].iss_ex++;
983		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
984			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
985			inm->inm_st[1].iss_in++;
986		}
987	}
988
989	/*
990	 * Track inm filter state in terms of listener counts.
991	 * If there are any exclusive listeners, stack-wide
992	 * membership is exclusive.
993	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
994	 * If no listeners remain, state is undefined at t1,
995	 * and the IGMP lifecycle for this group should finish.
996	 */
997	if (inm->inm_st[1].iss_ex > 0) {
998		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
999		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1000	} else if (inm->inm_st[1].iss_in > 0) {
1001		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1002		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1003	} else {
1004		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1005		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1006	}
1007
1008	/* Decrement ASM listener count on transition out of ASM mode. */
1009	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1010		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1011		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
1012			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1013			--inm->inm_st[1].iss_asm;
1014	}
1015
1016	/* Increment ASM listener count on transition to ASM mode. */
1017	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1018		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1019		inm->inm_st[1].iss_asm++;
1020	}
1021
1022	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1023	inm_print(inm);
1024
1025out_reap:
1026	if (schanged > 0) {
1027		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1028		inm_reap(inm);
1029	}
1030	return (error);
1031}
1032
1033/*
1034 * Mark an in_multi's filter set deltas as committed.
1035 * Called by IGMP after a state change has been enqueued.
1036 */
1037void
1038inm_commit(struct in_multi *inm)
1039{
1040	struct ip_msource	*ims;
1041
1042	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1043	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1044	inm_print(inm);
1045
1046	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1047		ims->ims_st[0] = ims->ims_st[1];
1048	}
1049	inm->inm_st[0] = inm->inm_st[1];
1050}
1051
1052/*
1053 * Reap unreferenced nodes from an in_multi's filter set.
1054 */
1055static void
1056inm_reap(struct in_multi *inm)
1057{
1058	struct ip_msource	*ims, *tims;
1059
1060	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1061		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1062		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1063		    ims->ims_stp != 0)
1064			continue;
1065		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1066		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1067		free(ims, M_IPMSOURCE);
1068		inm->inm_nsrc--;
1069	}
1070}
1071
1072/*
1073 * Purge all source nodes from an in_multi's filter set.
1074 */
1075static void
1076inm_purge(struct in_multi *inm)
1077{
1078	struct ip_msource	*ims, *tims;
1079
1080	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1081		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1082		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1083		free(ims, M_IPMSOURCE);
1084		inm->inm_nsrc--;
1085	}
1086}
1087
1088/*
1089 * Join a multicast group; unlocked entry point.
1090 *
1091 * SMPng: XXX: in_joingroup() is called from in_control() when Giant
1092 * is not held. Fortunately, ifp is unlikely to have been detached
1093 * at this point, so we assume it's OK to recurse.
1094 */
1095int
1096in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1097    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1098{
1099	int error;
1100
1101	IN_MULTI_LOCK();
1102	error = in_joingroup_locked(ifp, gina, imf, pinm);
1103	IN_MULTI_UNLOCK();
1104
1105	return (error);
1106}
1107
1108/*
1109 * Join a multicast group; real entry point.
1110 *
1111 * Only preserves atomicity at inm level.
1112 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1113 *
1114 * If the IGMP downcall fails, the group is not joined, and an error
1115 * code is returned.
1116 */
1117int
1118in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1119    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1120{
1121	struct in_mfilter	 timf;
1122	struct in_multi		*inm;
1123	int			 error;
1124
1125	IN_MULTI_LOCK_ASSERT();
1126
1127	CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
1128	    inet_ntoa(*gina), ifp, ifp->if_xname);
1129
1130	error = 0;
1131	inm = NULL;
1132
1133	/*
1134	 * If no imf was specified (i.e. kernel consumer),
1135	 * fake one up and assume it is an ASM join.
1136	 */
1137	if (imf == NULL) {
1138		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1139		imf = &timf;
1140	}
1141
1142	error = in_getmulti(ifp, gina, &inm);
1143	if (error) {
1144		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1145		return (error);
1146	}
1147
1148	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1149	error = inm_merge(inm, imf);
1150	if (error) {
1151		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1152		goto out_inm_release;
1153	}
1154
1155	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1156	error = igmp_change_state(inm);
1157	if (error) {
1158		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1159		goto out_inm_release;
1160	}
1161
1162out_inm_release:
1163	if (error) {
1164		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1165		inm_release_locked(inm);
1166	} else {
1167		*pinm = inm;
1168	}
1169
1170	return (error);
1171}
1172
1173/*
1174 * Leave a multicast group; unlocked entry point.
1175 */
1176int
1177in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1178{
1179	struct ifnet *ifp;
1180	int error;
1181
1182	ifp = inm->inm_ifp;
1183
1184	IN_MULTI_LOCK();
1185	error = in_leavegroup_locked(inm, imf);
1186	IN_MULTI_UNLOCK();
1187
1188	return (error);
1189}
1190
1191/*
1192 * Leave a multicast group; real entry point.
1193 * All source filters will be expunged.
1194 *
1195 * Only preserves atomicity at inm level.
1196 *
1197 * Holding the write lock for the INP which contains imf
1198 * is highly advisable. We can't assert for it as imf does not
1199 * contain a back-pointer to the owning inp.
1200 *
1201 * Note: This is not the same as inm_release(*) as this function also
1202 * makes a state change downcall into IGMP.
1203 */
1204int
1205in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1206{
1207	struct in_mfilter	 timf;
1208	int			 error;
1209
1210	error = 0;
1211
1212	IN_MULTI_LOCK_ASSERT();
1213
1214	CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
1215	    inm, inet_ntoa(inm->inm_addr),
1216	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1217	    imf);
1218
1219	/*
1220	 * If no imf was specified (i.e. kernel consumer),
1221	 * fake one up and assume it is an ASM join.
1222	 */
1223	if (imf == NULL) {
1224		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1225		imf = &timf;
1226	}
1227
1228	/*
1229	 * Begin state merge transaction at IGMP layer.
1230	 *
1231	 * As this particular invocation should not cause any memory
1232	 * to be allocated, and there is no opportunity to roll back
1233	 * the transaction, it MUST NOT fail.
1234	 */
1235	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1236	error = inm_merge(inm, imf);
1237	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1238
1239	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1240	error = igmp_change_state(inm);
1241	if (error)
1242		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1243
1244	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1245	inm_release_locked(inm);
1246
1247	return (error);
1248}
1249
1250/*#ifndef BURN_BRIDGES*/
1251/*
1252 * Join an IPv4 multicast group in (*,G) exclusive mode.
1253 * The group must be a 224.0.0.0/24 link-scope group.
1254 * This KPI is for legacy kernel consumers only.
1255 */
1256struct in_multi *
1257in_addmulti(struct in_addr *ap, struct ifnet *ifp)
1258{
1259	struct in_multi *pinm;
1260	int error;
1261
1262	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
1263	    ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
1264
1265	error = in_joingroup(ifp, ap, NULL, &pinm);
1266	if (error != 0)
1267		pinm = NULL;
1268
1269	return (pinm);
1270}
1271
1272/*
1273 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
1274 * This KPI is for legacy kernel consumers only.
1275 */
1276void
1277in_delmulti(struct in_multi *inm)
1278{
1279
1280	(void)in_leavegroup(inm, NULL);
1281}
1282/*#endif*/
1283
1284/*
1285 * Block or unblock an ASM multicast source on an inpcb.
1286 * This implements the delta-based API described in RFC 3678.
1287 *
1288 * The delta-based API applies only to exclusive-mode memberships.
1289 * An IGMP downcall will be performed.
1290 *
1291 * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1292 *
1293 * Return 0 if successful, otherwise return an appropriate error code.
1294 */
1295static int
1296inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1297{
1298	INIT_VNET_NET(curvnet);
1299	INIT_VNET_INET(curvnet);
1300	struct group_source_req		 gsr;
1301	sockunion_t			*gsa, *ssa;
1302	struct ifnet			*ifp;
1303	struct in_mfilter		*imf;
1304	struct ip_moptions		*imo;
1305	struct in_msource		*ims;
1306	struct in_multi			*inm;
1307	size_t				 idx;
1308	uint16_t			 fmode;
1309	int				 error, doblock;
1310
1311	ifp = NULL;
1312	error = 0;
1313	doblock = 0;
1314
1315	memset(&gsr, 0, sizeof(struct group_source_req));
1316	gsa = (sockunion_t *)&gsr.gsr_group;
1317	ssa = (sockunion_t *)&gsr.gsr_source;
1318
1319	switch (sopt->sopt_name) {
1320	case IP_BLOCK_SOURCE:
1321	case IP_UNBLOCK_SOURCE: {
1322		struct ip_mreq_source	 mreqs;
1323
1324		error = sooptcopyin(sopt, &mreqs,
1325		    sizeof(struct ip_mreq_source),
1326		    sizeof(struct ip_mreq_source));
1327		if (error)
1328			return (error);
1329
1330		gsa->sin.sin_family = AF_INET;
1331		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1332		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1333
1334		ssa->sin.sin_family = AF_INET;
1335		ssa->sin.sin_len = sizeof(struct sockaddr_in);
1336		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1337
1338		if (!in_nullhost(mreqs.imr_interface))
1339			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1340
1341		if (sopt->sopt_name == IP_BLOCK_SOURCE)
1342			doblock = 1;
1343
1344		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1345		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1346		break;
1347	    }
1348
1349	case MCAST_BLOCK_SOURCE:
1350	case MCAST_UNBLOCK_SOURCE:
1351		error = sooptcopyin(sopt, &gsr,
1352		    sizeof(struct group_source_req),
1353		    sizeof(struct group_source_req));
1354		if (error)
1355			return (error);
1356
1357		if (gsa->sin.sin_family != AF_INET ||
1358		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1359			return (EINVAL);
1360
1361		if (ssa->sin.sin_family != AF_INET ||
1362		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1363			return (EINVAL);
1364
1365		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1366			return (EADDRNOTAVAIL);
1367
1368		ifp = ifnet_byindex(gsr.gsr_interface);
1369
1370		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1371			doblock = 1;
1372		break;
1373
1374	default:
1375		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1376		    __func__, sopt->sopt_name);
1377		return (EOPNOTSUPP);
1378		break;
1379	}
1380
1381	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1382		return (EINVAL);
1383
1384	/*
1385	 * Check if we are actually a member of this group.
1386	 */
1387	imo = inp_findmoptions(inp);
1388	idx = imo_match_group(imo, ifp, &gsa->sa);
1389	if (idx == -1 || imo->imo_mfilters == NULL) {
1390		error = EADDRNOTAVAIL;
1391		goto out_inp_locked;
1392	}
1393
1394	KASSERT(imo->imo_mfilters != NULL,
1395	    ("%s: imo_mfilters not allocated", __func__));
1396	imf = &imo->imo_mfilters[idx];
1397	inm = imo->imo_membership[idx];
1398
1399	/*
1400	 * Attempting to use the delta-based API on an
1401	 * non exclusive-mode membership is an error.
1402	 */
1403	fmode = imf->imf_st[0];
1404	if (fmode != MCAST_EXCLUDE) {
1405		error = EINVAL;
1406		goto out_inp_locked;
1407	}
1408
1409	/*
1410	 * Deal with error cases up-front:
1411	 *  Asked to block, but already blocked; or
1412	 *  Asked to unblock, but nothing to unblock.
1413	 * If adding a new block entry, allocate it.
1414	 */
1415	ims = imo_match_source(imo, idx, &ssa->sa);
1416	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1417		CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
1418		    inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
1419		error = EADDRNOTAVAIL;
1420		goto out_inp_locked;
1421	}
1422
1423	INP_WLOCK_ASSERT(inp);
1424
1425	/*
1426	 * Begin state merge transaction at socket layer.
1427	 */
1428	if (doblock) {
1429		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1430		ims = imf_graft(imf, fmode, &ssa->sin);
1431		if (ims == NULL)
1432			error = ENOMEM;
1433	} else {
1434		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1435		error = imf_prune(imf, &ssa->sin);
1436	}
1437
1438	if (error) {
1439		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1440		goto out_imf_rollback;
1441	}
1442
1443	/*
1444	 * Begin state merge transaction at IGMP layer.
1445	 */
1446	IN_MULTI_LOCK();
1447
1448	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1449	error = inm_merge(inm, imf);
1450	if (error) {
1451		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1452		goto out_imf_rollback;
1453	}
1454
1455	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1456	error = igmp_change_state(inm);
1457	if (error)
1458		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1459
1460	IN_MULTI_UNLOCK();
1461
1462out_imf_rollback:
1463	if (error)
1464		imf_rollback(imf);
1465	else
1466		imf_commit(imf);
1467
1468	imf_reap(imf);
1469
1470out_inp_locked:
1471	INP_WUNLOCK(inp);
1472	return (error);
1473}
1474
1475/*
1476 * Given an inpcb, return its multicast options structure pointer.  Accepts
1477 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1478 *
1479 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1480 * SMPng: NOTE: Returns with the INP write lock held.
1481 */
1482static struct ip_moptions *
1483inp_findmoptions(struct inpcb *inp)
1484{
1485	struct ip_moptions	 *imo;
1486	struct in_multi		**immp;
1487	struct in_mfilter	 *imfp;
1488	size_t			  idx;
1489
1490	INP_WLOCK(inp);
1491	if (inp->inp_moptions != NULL)
1492		return (inp->inp_moptions);
1493
1494	INP_WUNLOCK(inp);
1495
1496	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1497	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
1498	    M_WAITOK | M_ZERO);
1499	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
1500	    M_INMFILTER, M_WAITOK);
1501
1502	imo->imo_multicast_ifp = NULL;
1503	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1504	imo->imo_multicast_vif = -1;
1505	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1506	imo->imo_multicast_loop = in_mcast_loop;
1507	imo->imo_num_memberships = 0;
1508	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1509	imo->imo_membership = immp;
1510
1511	/* Initialize per-group source filters. */
1512	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
1513		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1514	imo->imo_mfilters = imfp;
1515
1516	INP_WLOCK(inp);
1517	if (inp->inp_moptions != NULL) {
1518		free(imfp, M_INMFILTER);
1519		free(immp, M_IPMOPTS);
1520		free(imo, M_IPMOPTS);
1521		return (inp->inp_moptions);
1522	}
1523	inp->inp_moptions = imo;
1524	return (imo);
1525}
1526
1527/*
1528 * Discard the IP multicast options (and source filters).
1529 *
1530 * SMPng: NOTE: assumes INP write lock is held.
1531 */
1532void
1533inp_freemoptions(struct ip_moptions *imo)
1534{
1535	struct in_mfilter	*imf;
1536	size_t			 idx, nmships;
1537
1538	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
1539
1540	nmships = imo->imo_num_memberships;
1541	for (idx = 0; idx < nmships; ++idx) {
1542		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
1543		if (imf)
1544			imf_leave(imf);
1545		(void)in_leavegroup(imo->imo_membership[idx], imf);
1546		if (imf)
1547			imf_purge(imf);
1548	}
1549
1550	if (imo->imo_mfilters)
1551		free(imo->imo_mfilters, M_INMFILTER);
1552	free(imo->imo_membership, M_IPMOPTS);
1553	free(imo, M_IPMOPTS);
1554}
1555
1556/*
1557 * Atomically get source filters on a socket for an IPv4 multicast group.
1558 * Called with INP lock held; returns with lock released.
1559 */
1560static int
1561inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1562{
1563	INIT_VNET_NET(curvnet);
1564	struct __msfilterreq	 msfr;
1565	sockunion_t		*gsa;
1566	struct ifnet		*ifp;
1567	struct ip_moptions	*imo;
1568	struct in_mfilter	*imf;
1569	struct ip_msource	*ims;
1570	struct in_msource	*lims;
1571	struct sockaddr_in	*psin;
1572	struct sockaddr_storage	*ptss;
1573	struct sockaddr_storage	*tss;
1574	int			 error;
1575	size_t			 idx, nsrcs, ncsrcs;
1576
1577	INP_WLOCK_ASSERT(inp);
1578
1579	imo = inp->inp_moptions;
1580	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1581
1582	INP_WUNLOCK(inp);
1583
1584	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1585	    sizeof(struct __msfilterreq));
1586	if (error)
1587		return (error);
1588
1589	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1590		return (EINVAL);
1591
1592	ifp = ifnet_byindex(msfr.msfr_ifindex);
1593	if (ifp == NULL)
1594		return (EINVAL);
1595
1596	INP_WLOCK(inp);
1597
1598	/*
1599	 * Lookup group on the socket.
1600	 */
1601	gsa = (sockunion_t *)&msfr.msfr_group;
1602	idx = imo_match_group(imo, ifp, &gsa->sa);
1603	if (idx == -1 || imo->imo_mfilters == NULL) {
1604		INP_WUNLOCK(inp);
1605		return (EADDRNOTAVAIL);
1606	}
1607	imf = &imo->imo_mfilters[idx];
1608
1609	/*
1610	 * Ignore memberships which are in limbo.
1611	 */
1612	if (imf->imf_st[1] == MCAST_UNDEFINED) {
1613		INP_WUNLOCK(inp);
1614		return (EAGAIN);
1615	}
1616	msfr.msfr_fmode = imf->imf_st[1];
1617
1618	/*
1619	 * If the user specified a buffer, copy out the source filter
1620	 * entries to userland gracefully.
1621	 * We only copy out the number of entries which userland
1622	 * has asked for, but we always tell userland how big the
1623	 * buffer really needs to be.
1624	 */
1625	tss = NULL;
1626	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1627		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1628		    M_TEMP, M_NOWAIT | M_ZERO);
1629		if (tss == NULL) {
1630			INP_WUNLOCK(inp);
1631			return (ENOBUFS);
1632		}
1633	}
1634
1635	/*
1636	 * Count number of sources in-mode at t0.
1637	 * If buffer space exists and remains, copy out source entries.
1638	 */
1639	nsrcs = msfr.msfr_nsrcs;
1640	ncsrcs = 0;
1641	ptss = tss;
1642	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1643		lims = (struct in_msource *)ims;
1644		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1645		    lims->imsl_st[0] != imf->imf_st[0])
1646			continue;
1647		++ncsrcs;
1648		if (tss != NULL && nsrcs > 0) {
1649			psin = (struct sockaddr_in *)ptss;
1650			psin->sin_family = AF_INET;
1651			psin->sin_len = sizeof(struct sockaddr_in);
1652			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1653			psin->sin_port = 0;
1654			++ptss;
1655			--nsrcs;
1656		}
1657	}
1658
1659	INP_WUNLOCK(inp);
1660
1661	if (tss != NULL) {
1662		error = copyout(tss, msfr.msfr_srcs,
1663		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1664		free(tss, M_TEMP);
1665		if (error)
1666			return (error);
1667	}
1668
1669	msfr.msfr_nsrcs = ncsrcs;
1670	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1671
1672	return (error);
1673}
1674
1675/*
1676 * Return the IP multicast options in response to user getsockopt().
1677 */
1678int
1679inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1680{
1681	INIT_VNET_INET(curvnet);
1682	struct ip_mreqn		 mreqn;
1683	struct ip_moptions	*imo;
1684	struct ifnet		*ifp;
1685	struct in_ifaddr	*ia;
1686	int			 error, optval;
1687	u_char			 coptval;
1688
1689	INP_WLOCK(inp);
1690	imo = inp->inp_moptions;
1691	/*
1692	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1693	 * or is a divert socket, reject it.
1694	 */
1695	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1696	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1697	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1698		INP_WUNLOCK(inp);
1699		return (EOPNOTSUPP);
1700	}
1701
1702	error = 0;
1703	switch (sopt->sopt_name) {
1704	case IP_MULTICAST_VIF:
1705		if (imo != NULL)
1706			optval = imo->imo_multicast_vif;
1707		else
1708			optval = -1;
1709		INP_WUNLOCK(inp);
1710		error = sooptcopyout(sopt, &optval, sizeof(int));
1711		break;
1712
1713	case IP_MULTICAST_IF:
1714		memset(&mreqn, 0, sizeof(struct ip_mreqn));
1715		if (imo != NULL) {
1716			ifp = imo->imo_multicast_ifp;
1717			if (!in_nullhost(imo->imo_multicast_addr)) {
1718				mreqn.imr_address = imo->imo_multicast_addr;
1719			} else if (ifp != NULL) {
1720				mreqn.imr_ifindex = ifp->if_index;
1721				IFP_TO_IA(ifp, ia);
1722				if (ia != NULL) {
1723					mreqn.imr_address =
1724					    IA_SIN(ia)->sin_addr;
1725					ifa_free(&ia->ia_ifa);
1726				}
1727			}
1728		}
1729		INP_WUNLOCK(inp);
1730		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1731			error = sooptcopyout(sopt, &mreqn,
1732			    sizeof(struct ip_mreqn));
1733		} else {
1734			error = sooptcopyout(sopt, &mreqn.imr_address,
1735			    sizeof(struct in_addr));
1736		}
1737		break;
1738
1739	case IP_MULTICAST_TTL:
1740		if (imo == 0)
1741			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1742		else
1743			optval = coptval = imo->imo_multicast_ttl;
1744		INP_WUNLOCK(inp);
1745		if (sopt->sopt_valsize == sizeof(u_char))
1746			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1747		else
1748			error = sooptcopyout(sopt, &optval, sizeof(int));
1749		break;
1750
1751	case IP_MULTICAST_LOOP:
1752		if (imo == 0)
1753			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1754		else
1755			optval = coptval = imo->imo_multicast_loop;
1756		INP_WUNLOCK(inp);
1757		if (sopt->sopt_valsize == sizeof(u_char))
1758			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1759		else
1760			error = sooptcopyout(sopt, &optval, sizeof(int));
1761		break;
1762
1763	case IP_MSFILTER:
1764		if (imo == NULL) {
1765			error = EADDRNOTAVAIL;
1766			INP_WUNLOCK(inp);
1767		} else {
1768			error = inp_get_source_filters(inp, sopt);
1769		}
1770		break;
1771
1772	default:
1773		INP_WUNLOCK(inp);
1774		error = ENOPROTOOPT;
1775		break;
1776	}
1777
1778	INP_UNLOCK_ASSERT(inp);
1779
1780	return (error);
1781}
1782
1783/*
1784 * Look up the ifnet to use for a multicast group membership,
1785 * given the IPv4 address of an interface, and the IPv4 group address.
1786 *
1787 * This routine exists to support legacy multicast applications
1788 * which do not understand that multicast memberships are scoped to
1789 * specific physical links in the networking stack, or which need
1790 * to join link-scope groups before IPv4 addresses are configured.
1791 *
1792 * If inp is non-NULL, use this socket's current FIB number for any
1793 * required FIB lookup.
1794 * If ina is INADDR_ANY, look up the group address in the unicast FIB,
1795 * and use its ifp; usually, this points to the default next-hop.
1796 *
1797 * If the FIB lookup fails, attempt to use the first non-loopback
1798 * interface with multicast capability in the system as a
1799 * last resort. The legacy IPv4 ASM API requires that we do
1800 * this in order to allow groups to be joined when the routing
1801 * table has not yet been populated during boot.
1802 *
1803 * Returns NULL if no ifp could be found.
1804 *
1805 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
1806 * FUTURE: Implement IPv4 source-address selection.
1807 */
1808static struct ifnet *
1809inp_lookup_mcast_ifp(const struct inpcb *inp,
1810    const struct sockaddr_in *gsin, const struct in_addr ina)
1811{
1812	INIT_VNET_INET(curvnet);
1813	struct ifnet *ifp;
1814
1815	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
1816	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
1817	    ("%s: not multicast", __func__));
1818
1819	ifp = NULL;
1820	if (!in_nullhost(ina)) {
1821		INADDR_TO_IFP(ina, ifp);
1822	} else {
1823		struct route ro;
1824
1825		ro.ro_rt = NULL;
1826		memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
1827		in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
1828		if (ro.ro_rt != NULL) {
1829			ifp = ro.ro_rt->rt_ifp;
1830			KASSERT(ifp != NULL, ("%s: null ifp", __func__));
1831			RTFREE(ro.ro_rt);
1832		} else {
1833			struct in_ifaddr *ia;
1834			struct ifnet *mifp;
1835
1836			mifp = NULL;
1837			IN_IFADDR_RLOCK();
1838			TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1839				mifp = ia->ia_ifp;
1840				if (!(mifp->if_flags & IFF_LOOPBACK) &&
1841				     (mifp->if_flags & IFF_MULTICAST)) {
1842					ifp = mifp;
1843					break;
1844				}
1845			}
1846			IN_IFADDR_RUNLOCK();
1847		}
1848	}
1849
1850	return (ifp);
1851}
1852
1853/*
1854 * Join an IPv4 multicast group, possibly with a source.
1855 */
1856static int
1857inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1858{
1859	INIT_VNET_NET(curvnet);
1860	struct group_source_req		 gsr;
1861	sockunion_t			*gsa, *ssa;
1862	struct ifnet			*ifp;
1863	struct in_mfilter		*imf;
1864	struct ip_moptions		*imo;
1865	struct in_multi			*inm;
1866	struct in_msource		*lims;
1867	size_t				 idx;
1868	int				 error, is_new;
1869
1870	ifp = NULL;
1871	imf = NULL;
1872	error = 0;
1873	is_new = 0;
1874
1875	memset(&gsr, 0, sizeof(struct group_source_req));
1876	gsa = (sockunion_t *)&gsr.gsr_group;
1877	gsa->ss.ss_family = AF_UNSPEC;
1878	ssa = (sockunion_t *)&gsr.gsr_source;
1879	ssa->ss.ss_family = AF_UNSPEC;
1880
1881	switch (sopt->sopt_name) {
1882	case IP_ADD_MEMBERSHIP:
1883	case IP_ADD_SOURCE_MEMBERSHIP: {
1884		struct ip_mreq_source	 mreqs;
1885
1886		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
1887			error = sooptcopyin(sopt, &mreqs,
1888			    sizeof(struct ip_mreq),
1889			    sizeof(struct ip_mreq));
1890			/*
1891			 * Do argument switcharoo from ip_mreq into
1892			 * ip_mreq_source to avoid using two instances.
1893			 */
1894			mreqs.imr_interface = mreqs.imr_sourceaddr;
1895			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1896		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1897			error = sooptcopyin(sopt, &mreqs,
1898			    sizeof(struct ip_mreq_source),
1899			    sizeof(struct ip_mreq_source));
1900		}
1901		if (error)
1902			return (error);
1903
1904		gsa->sin.sin_family = AF_INET;
1905		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1906		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1907
1908		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1909			ssa->sin.sin_family = AF_INET;
1910			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1911			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1912		}
1913
1914		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
1915		    mreqs.imr_interface);
1916		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1917		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1918		break;
1919	}
1920
1921	case MCAST_JOIN_GROUP:
1922	case MCAST_JOIN_SOURCE_GROUP:
1923		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1924			error = sooptcopyin(sopt, &gsr,
1925			    sizeof(struct group_req),
1926			    sizeof(struct group_req));
1927		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1928			error = sooptcopyin(sopt, &gsr,
1929			    sizeof(struct group_source_req),
1930			    sizeof(struct group_source_req));
1931		}
1932		if (error)
1933			return (error);
1934
1935		if (gsa->sin.sin_family != AF_INET ||
1936		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1937			return (EINVAL);
1938
1939		/*
1940		 * Overwrite the port field if present, as the sockaddr
1941		 * being copied in may be matched with a binary comparison.
1942		 */
1943		gsa->sin.sin_port = 0;
1944		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1945			if (ssa->sin.sin_family != AF_INET ||
1946			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1947				return (EINVAL);
1948			ssa->sin.sin_port = 0;
1949		}
1950
1951		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1952			return (EADDRNOTAVAIL);
1953		ifp = ifnet_byindex(gsr.gsr_interface);
1954		break;
1955
1956	default:
1957		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1958		    __func__, sopt->sopt_name);
1959		return (EOPNOTSUPP);
1960		break;
1961	}
1962
1963	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1964		return (EINVAL);
1965
1966	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1967		return (EADDRNOTAVAIL);
1968
1969	/*
1970	 * MCAST_JOIN_SOURCE on an exclusive membership is an error.
1971	 * On an existing inclusive membership, it just adds the
1972	 * source to the filter list.
1973	 */
1974	imo = inp_findmoptions(inp);
1975	idx = imo_match_group(imo, ifp, &gsa->sa);
1976	if (idx == -1) {
1977		is_new = 1;
1978	} else {
1979		inm = imo->imo_membership[idx];
1980		imf = &imo->imo_mfilters[idx];
1981		if (ssa->ss.ss_family != AF_UNSPEC &&
1982		    imf->imf_st[1] != MCAST_INCLUDE) {
1983			error = EINVAL;
1984			goto out_inp_locked;
1985		}
1986		lims = imo_match_source(imo, idx, &ssa->sa);
1987		if (lims != NULL) {
1988			error = EADDRNOTAVAIL;
1989			goto out_inp_locked;
1990		}
1991	}
1992
1993	/*
1994	 * Begin state merge transaction at socket layer.
1995	 */
1996	INP_WLOCK_ASSERT(inp);
1997
1998	if (is_new) {
1999		if (imo->imo_num_memberships == imo->imo_max_memberships) {
2000			error = imo_grow(imo);
2001			if (error)
2002				goto out_inp_locked;
2003		}
2004		/*
2005		 * Allocate the new slot upfront so we can deal with
2006		 * grafting the new source filter in same code path
2007		 * as for join-source on existing membership.
2008		 */
2009		idx = imo->imo_num_memberships;
2010		imo->imo_membership[idx] = NULL;
2011		imo->imo_num_memberships++;
2012		KASSERT(imo->imo_mfilters != NULL,
2013		    ("%s: imf_mfilters vector was not allocated", __func__));
2014		imf = &imo->imo_mfilters[idx];
2015		KASSERT(RB_EMPTY(&imf->imf_sources),
2016		    ("%s: imf_sources not empty", __func__));
2017	}
2018
2019	/*
2020	 * Graft new source into filter list for this inpcb's
2021	 * membership of the group. The in_multi may not have
2022	 * been allocated yet if this is a new membership.
2023	 */
2024	if (ssa->ss.ss_family != AF_UNSPEC) {
2025		/* Membership starts in IN mode */
2026		if (is_new) {
2027			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2028			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
2029		} else {
2030			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2031		}
2032		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2033		if (lims == NULL) {
2034			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2035			    __func__);
2036			error = ENOMEM;
2037			goto out_imo_free;
2038		}
2039	}
2040
2041	/*
2042	 * Begin state merge transaction at IGMP layer.
2043	 */
2044	IN_MULTI_LOCK();
2045
2046	if (is_new) {
2047		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2048		    &inm);
2049		if (error)
2050			goto out_imo_free;
2051		imo->imo_membership[idx] = inm;
2052	} else {
2053		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2054		error = inm_merge(inm, imf);
2055		if (error) {
2056			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2057			    __func__);
2058			goto out_imf_rollback;
2059		}
2060		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2061		error = igmp_change_state(inm);
2062		if (error) {
2063			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2064			    __func__);
2065			goto out_imf_rollback;
2066		}
2067	}
2068
2069	IN_MULTI_UNLOCK();
2070
2071out_imf_rollback:
2072	INP_WLOCK_ASSERT(inp);
2073	if (error) {
2074		imf_rollback(imf);
2075		if (is_new)
2076			imf_purge(imf);
2077		else
2078			imf_reap(imf);
2079	} else {
2080		imf_commit(imf);
2081	}
2082
2083out_imo_free:
2084	if (error && is_new) {
2085		imo->imo_membership[idx] = NULL;
2086		--imo->imo_num_memberships;
2087	}
2088
2089out_inp_locked:
2090	INP_WUNLOCK(inp);
2091	return (error);
2092}
2093
2094/*
2095 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2096 */
2097static int
2098inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2099{
2100	INIT_VNET_NET(curvnet);
2101	INIT_VNET_INET(curvnet);
2102	struct group_source_req		 gsr;
2103	struct ip_mreq_source		 mreqs;
2104	sockunion_t			*gsa, *ssa;
2105	struct ifnet			*ifp;
2106	struct in_mfilter		*imf;
2107	struct ip_moptions		*imo;
2108	struct in_msource		*ims;
2109	struct in_multi			*inm;
2110	size_t				 idx;
2111	int				 error, is_final;
2112
2113	ifp = NULL;
2114	error = 0;
2115	is_final = 1;
2116
2117	memset(&gsr, 0, sizeof(struct group_source_req));
2118	gsa = (sockunion_t *)&gsr.gsr_group;
2119	gsa->ss.ss_family = AF_UNSPEC;
2120	ssa = (sockunion_t *)&gsr.gsr_source;
2121	ssa->ss.ss_family = AF_UNSPEC;
2122
2123	switch (sopt->sopt_name) {
2124	case IP_DROP_MEMBERSHIP:
2125	case IP_DROP_SOURCE_MEMBERSHIP:
2126		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2127			error = sooptcopyin(sopt, &mreqs,
2128			    sizeof(struct ip_mreq),
2129			    sizeof(struct ip_mreq));
2130			/*
2131			 * Swap interface and sourceaddr arguments,
2132			 * as ip_mreq and ip_mreq_source are laid
2133			 * out differently.
2134			 */
2135			mreqs.imr_interface = mreqs.imr_sourceaddr;
2136			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2137		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2138			error = sooptcopyin(sopt, &mreqs,
2139			    sizeof(struct ip_mreq_source),
2140			    sizeof(struct ip_mreq_source));
2141		}
2142		if (error)
2143			return (error);
2144
2145		gsa->sin.sin_family = AF_INET;
2146		gsa->sin.sin_len = sizeof(struct sockaddr_in);
2147		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2148
2149		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2150			ssa->sin.sin_family = AF_INET;
2151			ssa->sin.sin_len = sizeof(struct sockaddr_in);
2152			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2153		}
2154
2155		if (!in_nullhost(gsa->sin.sin_addr))
2156			INADDR_TO_IFP(mreqs.imr_interface, ifp);
2157
2158		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
2159		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
2160
2161		break;
2162
2163	case MCAST_LEAVE_GROUP:
2164	case MCAST_LEAVE_SOURCE_GROUP:
2165		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2166			error = sooptcopyin(sopt, &gsr,
2167			    sizeof(struct group_req),
2168			    sizeof(struct group_req));
2169		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2170			error = sooptcopyin(sopt, &gsr,
2171			    sizeof(struct group_source_req),
2172			    sizeof(struct group_source_req));
2173		}
2174		if (error)
2175			return (error);
2176
2177		if (gsa->sin.sin_family != AF_INET ||
2178		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2179			return (EINVAL);
2180
2181		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2182			if (ssa->sin.sin_family != AF_INET ||
2183			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2184				return (EINVAL);
2185		}
2186
2187		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2188			return (EADDRNOTAVAIL);
2189
2190		ifp = ifnet_byindex(gsr.gsr_interface);
2191		break;
2192
2193	default:
2194		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2195		    __func__, sopt->sopt_name);
2196		return (EOPNOTSUPP);
2197		break;
2198	}
2199
2200	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2201		return (EINVAL);
2202
2203	/*
2204	 * Find the membership in the membership array.
2205	 */
2206	imo = inp_findmoptions(inp);
2207	idx = imo_match_group(imo, ifp, &gsa->sa);
2208	if (idx == -1) {
2209		error = EADDRNOTAVAIL;
2210		goto out_inp_locked;
2211	}
2212	inm = imo->imo_membership[idx];
2213	imf = &imo->imo_mfilters[idx];
2214
2215	if (ssa->ss.ss_family != AF_UNSPEC)
2216		is_final = 0;
2217
2218	/*
2219	 * Begin state merge transaction at socket layer.
2220	 */
2221	INP_WLOCK_ASSERT(inp);
2222
2223	/*
2224	 * If we were instructed only to leave a given source, do so.
2225	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2226	 */
2227	if (is_final) {
2228		imf_leave(imf);
2229	} else {
2230		if (imf->imf_st[0] == MCAST_EXCLUDE) {
2231			error = EADDRNOTAVAIL;
2232			goto out_inp_locked;
2233		}
2234		ims = imo_match_source(imo, idx, &ssa->sa);
2235		if (ims == NULL) {
2236			CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
2237			    inet_ntoa(ssa->sin.sin_addr), "not ");
2238			error = EADDRNOTAVAIL;
2239			goto out_inp_locked;
2240		}
2241		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2242		error = imf_prune(imf, &ssa->sin);
2243		if (error) {
2244			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2245			    __func__);
2246			goto out_inp_locked;
2247		}
2248	}
2249
2250	/*
2251	 * Begin state merge transaction at IGMP layer.
2252	 */
2253	IN_MULTI_LOCK();
2254
2255	if (is_final) {
2256		/*
2257		 * Give up the multicast address record to which
2258		 * the membership points.
2259		 */
2260		(void)in_leavegroup_locked(inm, imf);
2261	} else {
2262		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2263		error = inm_merge(inm, imf);
2264		if (error) {
2265			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2266			    __func__);
2267			goto out_imf_rollback;
2268		}
2269
2270		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2271		error = igmp_change_state(inm);
2272		if (error) {
2273			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2274			    __func__);
2275		}
2276	}
2277
2278	IN_MULTI_UNLOCK();
2279
2280out_imf_rollback:
2281	if (error)
2282		imf_rollback(imf);
2283	else
2284		imf_commit(imf);
2285
2286	imf_reap(imf);
2287
2288	if (is_final) {
2289		/* Remove the gap in the membership array. */
2290		for (++idx; idx < imo->imo_num_memberships; ++idx)
2291			imo->imo_membership[idx-1] = imo->imo_membership[idx];
2292		imo->imo_num_memberships--;
2293	}
2294
2295out_inp_locked:
2296	INP_WUNLOCK(inp);
2297	return (error);
2298}
2299
2300/*
2301 * Select the interface for transmitting IPv4 multicast datagrams.
2302 *
2303 * Either an instance of struct in_addr or an instance of struct ip_mreqn
2304 * may be passed to this socket option. An address of INADDR_ANY or an
2305 * interface index of 0 is used to remove a previous selection.
2306 * When no interface is selected, one is chosen for every send.
2307 */
2308static int
2309inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2310{
2311	INIT_VNET_NET(curvnet);
2312	INIT_VNET_INET(curvnet);
2313	struct in_addr		 addr;
2314	struct ip_mreqn		 mreqn;
2315	struct ifnet		*ifp;
2316	struct ip_moptions	*imo;
2317	int			 error;
2318
2319	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2320		/*
2321		 * An interface index was specified using the
2322		 * Linux-derived ip_mreqn structure.
2323		 */
2324		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2325		    sizeof(struct ip_mreqn));
2326		if (error)
2327			return (error);
2328
2329		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
2330			return (EINVAL);
2331
2332		if (mreqn.imr_ifindex == 0) {
2333			ifp = NULL;
2334		} else {
2335			ifp = ifnet_byindex(mreqn.imr_ifindex);
2336			if (ifp == NULL)
2337				return (EADDRNOTAVAIL);
2338		}
2339	} else {
2340		/*
2341		 * An interface was specified by IPv4 address.
2342		 * This is the traditional BSD usage.
2343		 */
2344		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2345		    sizeof(struct in_addr));
2346		if (error)
2347			return (error);
2348		if (in_nullhost(addr)) {
2349			ifp = NULL;
2350		} else {
2351			INADDR_TO_IFP(addr, ifp);
2352			if (ifp == NULL)
2353				return (EADDRNOTAVAIL);
2354		}
2355		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
2356		    inet_ntoa(addr));
2357	}
2358
2359	/* Reject interfaces which do not support multicast. */
2360	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2361		return (EOPNOTSUPP);
2362
2363	imo = inp_findmoptions(inp);
2364	imo->imo_multicast_ifp = ifp;
2365	imo->imo_multicast_addr.s_addr = INADDR_ANY;
2366	INP_WUNLOCK(inp);
2367
2368	return (0);
2369}
2370
2371/*
2372 * Atomically set source filters on a socket for an IPv4 multicast group.
2373 *
2374 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2375 */
2376static int
2377inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2378{
2379	INIT_VNET_NET(curvnet);
2380	struct __msfilterreq	 msfr;
2381	sockunion_t		*gsa;
2382	struct ifnet		*ifp;
2383	struct in_mfilter	*imf;
2384	struct ip_moptions	*imo;
2385	struct in_multi		*inm;
2386	size_t			 idx;
2387	int			 error;
2388
2389	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2390	    sizeof(struct __msfilterreq));
2391	if (error)
2392		return (error);
2393
2394	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc ||
2395	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
2396	     msfr.msfr_fmode != MCAST_INCLUDE))
2397		return (EINVAL);
2398
2399	if (msfr.msfr_group.ss_family != AF_INET ||
2400	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2401		return (EINVAL);
2402
2403	gsa = (sockunion_t *)&msfr.msfr_group;
2404	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2405		return (EINVAL);
2406
2407	gsa->sin.sin_port = 0;	/* ignore port */
2408
2409	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2410		return (EADDRNOTAVAIL);
2411
2412	ifp = ifnet_byindex(msfr.msfr_ifindex);
2413	if (ifp == NULL)
2414		return (EADDRNOTAVAIL);
2415
2416	/*
2417	 * Take the INP write lock.
2418	 * Check if this socket is a member of this group.
2419	 */
2420	imo = inp_findmoptions(inp);
2421	idx = imo_match_group(imo, ifp, &gsa->sa);
2422	if (idx == -1 || imo->imo_mfilters == NULL) {
2423		error = EADDRNOTAVAIL;
2424		goto out_inp_locked;
2425	}
2426	inm = imo->imo_membership[idx];
2427	imf = &imo->imo_mfilters[idx];
2428
2429	/*
2430	 * Begin state merge transaction at socket layer.
2431	 */
2432	INP_WLOCK_ASSERT(inp);
2433
2434	imf->imf_st[1] = msfr.msfr_fmode;
2435
2436	/*
2437	 * Apply any new source filters, if present.
2438	 * Make a copy of the user-space source vector so
2439	 * that we may copy them with a single copyin. This
2440	 * allows us to deal with page faults up-front.
2441	 */
2442	if (msfr.msfr_nsrcs > 0) {
2443		struct in_msource	*lims;
2444		struct sockaddr_in	*psin;
2445		struct sockaddr_storage	*kss, *pkss;
2446		int			 i;
2447
2448		INP_WUNLOCK(inp);
2449
2450		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2451		    __func__, (unsigned long)msfr.msfr_nsrcs);
2452		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2453		    M_TEMP, M_WAITOK);
2454		error = copyin(msfr.msfr_srcs, kss,
2455		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2456		if (error) {
2457			free(kss, M_TEMP);
2458			return (error);
2459		}
2460
2461		INP_WLOCK(inp);
2462
2463		/*
2464		 * Mark all source filters as UNDEFINED at t1.
2465		 * Restore new group filter mode, as imf_leave()
2466		 * will set it to INCLUDE.
2467		 */
2468		imf_leave(imf);
2469		imf->imf_st[1] = msfr.msfr_fmode;
2470
2471		/*
2472		 * Update socket layer filters at t1, lazy-allocating
2473		 * new entries. This saves a bunch of memory at the
2474		 * cost of one RB_FIND() per source entry; duplicate
2475		 * entries in the msfr_nsrcs vector are ignored.
2476		 * If we encounter an error, rollback transaction.
2477		 *
2478		 * XXX This too could be replaced with a set-symmetric
2479		 * difference like loop to avoid walking from root
2480		 * every time, as the key space is common.
2481		 */
2482		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2483			psin = (struct sockaddr_in *)pkss;
2484			if (psin->sin_family != AF_INET) {
2485				error = EAFNOSUPPORT;
2486				break;
2487			}
2488			if (psin->sin_len != sizeof(struct sockaddr_in)) {
2489				error = EINVAL;
2490				break;
2491			}
2492			error = imf_get_source(imf, psin, &lims);
2493			if (error)
2494				break;
2495			lims->imsl_st[1] = imf->imf_st[1];
2496		}
2497		free(kss, M_TEMP);
2498	}
2499
2500	if (error)
2501		goto out_imf_rollback;
2502
2503	INP_WLOCK_ASSERT(inp);
2504	IN_MULTI_LOCK();
2505
2506	/*
2507	 * Begin state merge transaction at IGMP layer.
2508	 */
2509	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2510	error = inm_merge(inm, imf);
2511	if (error) {
2512		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2513		goto out_imf_rollback;
2514	}
2515
2516	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2517	error = igmp_change_state(inm);
2518	if (error)
2519		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2520
2521	IN_MULTI_UNLOCK();
2522
2523out_imf_rollback:
2524	if (error)
2525		imf_rollback(imf);
2526	else
2527		imf_commit(imf);
2528
2529	imf_reap(imf);
2530
2531out_inp_locked:
2532	INP_WUNLOCK(inp);
2533	return (error);
2534}
2535
2536/*
2537 * Set the IP multicast options in response to user setsockopt().
2538 *
2539 * Many of the socket options handled in this function duplicate the
2540 * functionality of socket options in the regular unicast API. However,
2541 * it is not possible to merge the duplicate code, because the idempotence
2542 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2543 * the effects of these options must be treated as separate and distinct.
2544 *
2545 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2546 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2547 * is refactored to no longer use vifs.
2548 */
2549int
2550inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2551{
2552	struct ip_moptions	*imo;
2553	int			 error;
2554
2555	error = 0;
2556
2557	/*
2558	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2559	 * or is a divert socket, reject it.
2560	 */
2561	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2562	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2563	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2564		return (EOPNOTSUPP);
2565
2566	switch (sopt->sopt_name) {
2567	case IP_MULTICAST_VIF: {
2568		int vifi;
2569		/*
2570		 * Select a multicast VIF for transmission.
2571		 * Only useful if multicast forwarding is active.
2572		 */
2573		if (legal_vif_num == NULL) {
2574			error = EOPNOTSUPP;
2575			break;
2576		}
2577		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2578		if (error)
2579			break;
2580		if (!legal_vif_num(vifi) && (vifi != -1)) {
2581			error = EINVAL;
2582			break;
2583		}
2584		imo = inp_findmoptions(inp);
2585		imo->imo_multicast_vif = vifi;
2586		INP_WUNLOCK(inp);
2587		break;
2588	}
2589
2590	case IP_MULTICAST_IF:
2591		error = inp_set_multicast_if(inp, sopt);
2592		break;
2593
2594	case IP_MULTICAST_TTL: {
2595		u_char ttl;
2596
2597		/*
2598		 * Set the IP time-to-live for outgoing multicast packets.
2599		 * The original multicast API required a char argument,
2600		 * which is inconsistent with the rest of the socket API.
2601		 * We allow either a char or an int.
2602		 */
2603		if (sopt->sopt_valsize == sizeof(u_char)) {
2604			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2605			    sizeof(u_char));
2606			if (error)
2607				break;
2608		} else {
2609			u_int ittl;
2610
2611			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2612			    sizeof(u_int));
2613			if (error)
2614				break;
2615			if (ittl > 255) {
2616				error = EINVAL;
2617				break;
2618			}
2619			ttl = (u_char)ittl;
2620		}
2621		imo = inp_findmoptions(inp);
2622		imo->imo_multicast_ttl = ttl;
2623		INP_WUNLOCK(inp);
2624		break;
2625	}
2626
2627	case IP_MULTICAST_LOOP: {
2628		u_char loop;
2629
2630		/*
2631		 * Set the loopback flag for outgoing multicast packets.
2632		 * Must be zero or one.  The original multicast API required a
2633		 * char argument, which is inconsistent with the rest
2634		 * of the socket API.  We allow either a char or an int.
2635		 */
2636		if (sopt->sopt_valsize == sizeof(u_char)) {
2637			error = sooptcopyin(sopt, &loop, sizeof(u_char),
2638			    sizeof(u_char));
2639			if (error)
2640				break;
2641		} else {
2642			u_int iloop;
2643
2644			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2645					    sizeof(u_int));
2646			if (error)
2647				break;
2648			loop = (u_char)iloop;
2649		}
2650		imo = inp_findmoptions(inp);
2651		imo->imo_multicast_loop = !!loop;
2652		INP_WUNLOCK(inp);
2653		break;
2654	}
2655
2656	case IP_ADD_MEMBERSHIP:
2657	case IP_ADD_SOURCE_MEMBERSHIP:
2658	case MCAST_JOIN_GROUP:
2659	case MCAST_JOIN_SOURCE_GROUP:
2660		error = inp_join_group(inp, sopt);
2661		break;
2662
2663	case IP_DROP_MEMBERSHIP:
2664	case IP_DROP_SOURCE_MEMBERSHIP:
2665	case MCAST_LEAVE_GROUP:
2666	case MCAST_LEAVE_SOURCE_GROUP:
2667		error = inp_leave_group(inp, sopt);
2668		break;
2669
2670	case IP_BLOCK_SOURCE:
2671	case IP_UNBLOCK_SOURCE:
2672	case MCAST_BLOCK_SOURCE:
2673	case MCAST_UNBLOCK_SOURCE:
2674		error = inp_block_unblock_source(inp, sopt);
2675		break;
2676
2677	case IP_MSFILTER:
2678		error = inp_set_source_filters(inp, sopt);
2679		break;
2680
2681	default:
2682		error = EOPNOTSUPP;
2683		break;
2684	}
2685
2686	INP_UNLOCK_ASSERT(inp);
2687
2688	return (error);
2689}
2690
2691/*
2692 * Expose IGMP's multicast filter mode and source list(s) to userland,
2693 * keyed by (ifindex, group).
2694 * The filter mode is written out as a uint32_t, followed by
2695 * 0..n of struct in_addr.
2696 * For use by ifmcstat(8).
2697 * SMPng: NOTE: unlocked read of ifindex space.
2698 */
2699static int
2700sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2701{
2702	INIT_VNET_NET(curvnet);
2703	struct in_addr			 src, group;
2704	struct ifnet			*ifp;
2705	struct ifmultiaddr		*ifma;
2706	struct in_multi			*inm;
2707	struct ip_msource		*ims;
2708	int				*name;
2709	int				 retval;
2710	u_int				 namelen;
2711	uint32_t			 fmode, ifindex;
2712
2713	name = (int *)arg1;
2714	namelen = arg2;
2715
2716	if (req->newptr != NULL)
2717		return (EPERM);
2718
2719	if (namelen != 2)
2720		return (EINVAL);
2721
2722	ifindex = name[0];
2723	if (ifindex <= 0 || ifindex > V_if_index) {
2724		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
2725		    __func__, ifindex);
2726		return (ENOENT);
2727	}
2728
2729	group.s_addr = name[1];
2730	if (!IN_MULTICAST(ntohl(group.s_addr))) {
2731		CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
2732		    __func__, inet_ntoa(group));
2733		return (EINVAL);
2734	}
2735
2736	ifp = ifnet_byindex(ifindex);
2737	if (ifp == NULL) {
2738		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2739		    __func__, ifindex);
2740		return (ENOENT);
2741	}
2742
2743	retval = sysctl_wire_old_buffer(req,
2744	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2745	if (retval)
2746		return (retval);
2747
2748	IN_MULTI_LOCK();
2749
2750	IF_ADDR_LOCK(ifp);
2751	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2752		if (ifma->ifma_addr->sa_family != AF_INET ||
2753		    ifma->ifma_protospec == NULL)
2754			continue;
2755		inm = (struct in_multi *)ifma->ifma_protospec;
2756		if (!in_hosteq(inm->inm_addr, group))
2757			continue;
2758		fmode = inm->inm_st[1].iss_fmode;
2759		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2760		if (retval != 0)
2761			break;
2762		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2763#ifdef KTR
2764			struct in_addr ina;
2765			ina.s_addr = htonl(ims->ims_haddr);
2766			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
2767			    inet_ntoa(ina));
2768#endif
2769			/*
2770			 * Only copy-out sources which are in-mode.
2771			 */
2772			if (fmode != ims_get_mode(inm, ims, 1)) {
2773				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2774				    __func__);
2775				continue;
2776			}
2777			src.s_addr = htonl(ims->ims_haddr);
2778			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2779			if (retval != 0)
2780				break;
2781		}
2782	}
2783	IF_ADDR_UNLOCK(ifp);
2784
2785	IN_MULTI_UNLOCK();
2786
2787	return (retval);
2788}
2789
2790#ifdef KTR
2791
2792static const char *inm_modestrs[] = { "un", "in", "ex" };
2793
2794static const char *
2795inm_mode_str(const int mode)
2796{
2797
2798	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2799		return (inm_modestrs[mode]);
2800	return ("??");
2801}
2802
2803static const char *inm_statestrs[] = {
2804	"not-member",
2805	"silent",
2806	"idle",
2807	"lazy",
2808	"sleeping",
2809	"awakening",
2810	"query-pending",
2811	"sg-query-pending",
2812	"leaving"
2813};
2814
2815static const char *
2816inm_state_str(const int state)
2817{
2818
2819	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
2820		return (inm_statestrs[state]);
2821	return ("??");
2822}
2823
2824/*
2825 * Dump an in_multi structure to the console.
2826 */
2827void
2828inm_print(const struct in_multi *inm)
2829{
2830	int t;
2831
2832	if ((ktr_mask & KTR_IGMPV3) == 0)
2833		return;
2834
2835	printf("%s: --- begin inm %p ---\n", __func__, inm);
2836	printf("addr %s ifp %p(%s) ifma %p\n",
2837	    inet_ntoa(inm->inm_addr),
2838	    inm->inm_ifp,
2839	    inm->inm_ifp->if_xname,
2840	    inm->inm_ifma);
2841	printf("timer %u state %s refcount %u scq.len %u\n",
2842	    inm->inm_timer,
2843	    inm_state_str(inm->inm_state),
2844	    inm->inm_refcount,
2845	    inm->inm_scq.ifq_len);
2846	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
2847	    inm->inm_igi,
2848	    inm->inm_nsrc,
2849	    inm->inm_sctimer,
2850	    inm->inm_scrv);
2851	for (t = 0; t < 2; t++) {
2852		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2853		    inm_mode_str(inm->inm_st[t].iss_fmode),
2854		    inm->inm_st[t].iss_asm,
2855		    inm->inm_st[t].iss_ex,
2856		    inm->inm_st[t].iss_in,
2857		    inm->inm_st[t].iss_rec);
2858	}
2859	printf("%s: --- end inm %p ---\n", __func__, inm);
2860}
2861
2862#else /* !KTR */
2863
2864void
2865inm_print(const struct in_multi *inm)
2866{
2867
2868}
2869
2870#endif /* KTR */
2871
2872RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
2873