in_mcast.c revision 189635
1/*-
2 * Copyright (c) 2007-2009 Bruce Simpson.
3 * Copyright (c) 2005 Robert N. M. Watson.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote
15 *    products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * IPv4 multicast socket, group, and socket option processing module.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/netinet/in_mcast.c 189635 2009-03-10 17:48:49Z bms $");
37
38#include "opt_route.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/protosw.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48#include <sys/protosw.h>
49#include <sys/sysctl.h>
50#include <sys/vimage.h>
51#include <sys/ktr.h>
52#include <sys/tree.h>
53
54#include <net/if.h>
55#include <net/if_dl.h>
56#include <net/route.h>
57#include <net/vnet.h>
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/in_pcb.h>
62#include <netinet/in_var.h>
63#include <netinet/ip_var.h>
64#include <netinet/igmp_var.h>
65#include <netinet/vinet.h>
66
67#ifndef KTR_IGMPV3
68#define KTR_IGMPV3 KTR_SUBSYS
69#endif
70
71#ifndef __SOCKUNION_DECLARED
72union sockunion {
73	struct sockaddr_storage	ss;
74	struct sockaddr		sa;
75	struct sockaddr_dl	sdl;
76	struct sockaddr_in	sin;
77};
78typedef union sockunion sockunion_t;
79#define __SOCKUNION_DECLARED
80#endif /* __SOCKUNION_DECLARED */
81
82static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
83    "IPv4 multicast PCB-layer source filter");
84static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
85static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
86static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
87    "IPv4 multicast IGMP-layer source filter");
88
89#ifdef VIMAGE_GLOBALS
90struct in_multihead in_multihead;	/* XXX now unused; retain for ABI */
91#endif
92
93/*
94 * Locking:
95 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
96 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
97 *   it can be taken by code in net/if.c also.
98 * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
99 *
100 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
101 * any need for in_multi itself to be virtualized -- it is bound to an ifp
102 * anyway no matter what happens.
103 */
104struct mtx in_multi_mtx;
105MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
106
107/*
108 * Functions with non-static linkage defined in this file should be
109 * declared in in_var.h:
110 *  imo_multi_filter()
111 *  in_addmulti()
112 *  in_delmulti()
113 *  in_joingroup()
114 *  in_joingroup_locked()
115 *  in_leavegroup()
116 *  in_leavegroup_locked()
117 * and ip_var.h:
118 *  inp_freemoptions()
119 *  inp_getmoptions()
120 *  inp_setmoptions()
121 *
122 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
123 * and in_delmulti().
124 */
125static void	imf_commit(struct in_mfilter *);
126static int	imf_get_source(struct in_mfilter *imf,
127		    const struct sockaddr_in *psin,
128		    struct in_msource **);
129static struct in_msource *
130		imf_graft(struct in_mfilter *, const uint8_t,
131		    const struct sockaddr_in *);
132static void	imf_leave(struct in_mfilter *);
133static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
134static void	imf_purge(struct in_mfilter *);
135static void	imf_rollback(struct in_mfilter *);
136static void	imf_reap(struct in_mfilter *);
137static int	imo_grow(struct ip_moptions *);
138static size_t	imo_match_group(const struct ip_moptions *,
139		    const struct ifnet *, const struct sockaddr *);
140static struct in_msource *
141		imo_match_source(const struct ip_moptions *, const size_t,
142		    const struct sockaddr *);
143static void	ims_merge(struct ip_msource *ims,
144		    const struct in_msource *lims, const int rollback);
145static int	in_getmulti(struct ifnet *, const struct in_addr *,
146		    struct in_multi **);
147static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
148		    const int noalloc, struct ip_msource **pims);
149static int	inm_is_ifp_detached(const struct in_multi *);
150static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
151static void	inm_purge(struct in_multi *);
152static void	inm_reap(struct in_multi *);
153static struct ip_moptions *
154		inp_findmoptions(struct inpcb *);
155static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
156static int	inp_join_group(struct inpcb *, struct sockopt *);
157static int	inp_leave_group(struct inpcb *, struct sockopt *);
158static struct ifnet *
159		inp_lookup_mcast_ifp(const struct inpcb *,
160		    const struct sockaddr_in *, const struct in_addr);
161static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
162static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
163static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
164static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
165
166SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
167
168static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
169SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
170    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
171    "Max source filters per group");
172TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
173
174static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
175SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
176    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
177    "Max source filters per socket");
178TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
179
180int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
181SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
182    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
183TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
184
185SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
186    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
187    "Per-interface stack-wide source filters");
188
189/*
190 * Inline function which wraps assertions for a valid ifp.
191 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
192 * is detached.
193 */
194static int __inline
195inm_is_ifp_detached(const struct in_multi *inm)
196{
197	struct ifnet *ifp;
198
199	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
200	ifp = inm->inm_ifma->ifma_ifp;
201	if (ifp != NULL) {
202		/*
203		 * Sanity check that netinet's notion of ifp is the
204		 * same as net's.
205		 */
206		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
207	}
208
209	return (ifp == NULL);
210}
211
212/*
213 * Initialize an in_mfilter structure to a known state at t0, t1
214 * with an empty source filter list.
215 */
216static __inline void
217imf_init(struct in_mfilter *imf, const int st0, const int st1)
218{
219	memset(imf, 0, sizeof(struct in_mfilter));
220	RB_INIT(&imf->imf_sources);
221	imf->imf_st[0] = st0;
222	imf->imf_st[1] = st1;
223}
224
225/*
226 * Resize the ip_moptions vector to the next power-of-two minus 1.
227 * May be called with locks held; do not sleep.
228 */
229static int
230imo_grow(struct ip_moptions *imo)
231{
232	struct in_multi		**nmships;
233	struct in_multi		**omships;
234	struct in_mfilter	 *nmfilters;
235	struct in_mfilter	 *omfilters;
236	size_t			  idx;
237	size_t			  newmax;
238	size_t			  oldmax;
239
240	nmships = NULL;
241	nmfilters = NULL;
242	omships = imo->imo_membership;
243	omfilters = imo->imo_mfilters;
244	oldmax = imo->imo_max_memberships;
245	newmax = ((oldmax + 1) * 2) - 1;
246
247	if (newmax <= IP_MAX_MEMBERSHIPS) {
248		nmships = (struct in_multi **)realloc(omships,
249		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
250		nmfilters = (struct in_mfilter *)realloc(omfilters,
251		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
252		if (nmships != NULL && nmfilters != NULL) {
253			/* Initialize newly allocated source filter heads. */
254			for (idx = oldmax; idx < newmax; idx++) {
255				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
256				    MCAST_EXCLUDE);
257			}
258			imo->imo_max_memberships = newmax;
259			imo->imo_membership = nmships;
260			imo->imo_mfilters = nmfilters;
261		}
262	}
263
264	if (nmships == NULL || nmfilters == NULL) {
265		if (nmships != NULL)
266			free(nmships, M_IPMOPTS);
267		if (nmfilters != NULL)
268			free(nmfilters, M_INMFILTER);
269		return (ETOOMANYREFS);
270	}
271
272	return (0);
273}
274
275/*
276 * Find an IPv4 multicast group entry for this ip_moptions instance
277 * which matches the specified group, and optionally an interface.
278 * Return its index into the array, or -1 if not found.
279 */
280static size_t
281imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
282    const struct sockaddr *group)
283{
284	const struct sockaddr_in *gsin;
285	struct in_multi	**pinm;
286	int		  idx;
287	int		  nmships;
288
289	gsin = (const struct sockaddr_in *)group;
290
291	/* The imo_membership array may be lazy allocated. */
292	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
293		return (-1);
294
295	nmships = imo->imo_num_memberships;
296	pinm = &imo->imo_membership[0];
297	for (idx = 0; idx < nmships; idx++, pinm++) {
298		if (*pinm == NULL)
299			continue;
300		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
301		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
302			break;
303		}
304	}
305	if (idx >= nmships)
306		idx = -1;
307
308	return (idx);
309}
310
311/*
312 * Find an IPv4 multicast source entry for this imo which matches
313 * the given group index for this socket, and source address.
314 *
315 * NOTE: This does not check if the entry is in-mode, merely if
316 * it exists, which may not be the desired behaviour.
317 */
318static struct in_msource *
319imo_match_source(const struct ip_moptions *imo, const size_t gidx,
320    const struct sockaddr *src)
321{
322	struct ip_msource	 find;
323	struct in_mfilter	*imf;
324	struct ip_msource	*ims;
325	const sockunion_t	*psa;
326
327	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
328	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
329	    ("%s: invalid index %d\n", __func__, (int)gidx));
330
331	/* The imo_mfilters array may be lazy allocated. */
332	if (imo->imo_mfilters == NULL)
333		return (NULL);
334	imf = &imo->imo_mfilters[gidx];
335
336	/* Source trees are keyed in host byte order. */
337	psa = (const sockunion_t *)src;
338	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
339	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
340
341	return ((struct in_msource *)ims);
342}
343
344/*
345 * Perform filtering for multicast datagrams on a socket by group and source.
346 *
347 * Returns 0 if a datagram should be allowed through, or various error codes
348 * if the socket was not a member of the group, or the source was muted, etc.
349 */
350int
351imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
352    const struct sockaddr *group, const struct sockaddr *src)
353{
354	size_t gidx;
355	struct in_msource *ims;
356	int mode;
357
358	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
359
360	gidx = imo_match_group(imo, ifp, group);
361	if (gidx == -1)
362		return (MCAST_NOTGMEMBER);
363
364	/*
365	 * Check if the source was included in an (S,G) join.
366	 * Allow reception on exclusive memberships by default,
367	 * reject reception on inclusive memberships by default.
368	 * Exclude source only if an in-mode exclude filter exists.
369	 * Include source only if an in-mode include filter exists.
370	 * NOTE: We are comparing group state here at IGMP t1 (now)
371	 * with socket-layer t0 (since last downcall).
372	 */
373	mode = imo->imo_mfilters[gidx].imf_st[1];
374	ims = imo_match_source(imo, gidx, src);
375
376	if ((ims == NULL && mode == MCAST_INCLUDE) ||
377	    (ims != NULL && ims->imsl_st[0] != mode))
378		return (MCAST_NOTSMEMBER);
379
380	return (MCAST_PASS);
381}
382
383/*
384 * Find and return a reference to an in_multi record for (ifp, group),
385 * and bump its reference count.
386 * If one does not exist, try to allocate it, and update link-layer multicast
387 * filters on ifp to listen for group.
388 * Assumes the IN_MULTI lock is held across the call.
389 * Return 0 if successful, otherwise return an appropriate error code.
390 */
391static int
392in_getmulti(struct ifnet *ifp, const struct in_addr *group,
393    struct in_multi **pinm)
394{
395	INIT_VNET_INET(ifp->if_vnet);
396	struct sockaddr_in	 gsin;
397	struct ifmultiaddr	*ifma;
398	struct in_ifinfo	*ii;
399	struct in_multi		*inm;
400	int error;
401
402#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
403	IFF_ASSERTGIANT(ifp);
404#endif
405	IN_MULTI_LOCK_ASSERT();
406
407	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
408
409	inm = inm_lookup(ifp, *group);
410	if (inm != NULL) {
411		/*
412		 * If we already joined this group, just bump the
413		 * refcount and return it.
414		 */
415		KASSERT(inm->inm_refcount >= 1,
416		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
417		++inm->inm_refcount;
418		*pinm = inm;
419		return (0);
420	}
421
422	memset(&gsin, 0, sizeof(gsin));
423	gsin.sin_family = AF_INET;
424	gsin.sin_len = sizeof(struct sockaddr_in);
425	gsin.sin_addr = *group;
426
427	/*
428	 * Check if a link-layer group is already associated
429	 * with this network-layer group on the given ifnet.
430	 */
431	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
432	if (error != 0)
433		return (error);
434
435	/*
436	 * If something other than netinet is occupying the link-layer
437	 * group, print a meaningful error message and back out of
438	 * the allocation.
439	 * Otherwise, bump the refcount on the existing network-layer
440	 * group association and return it.
441	 */
442	if (ifma->ifma_protospec != NULL) {
443		inm = (struct in_multi *)ifma->ifma_protospec;
444#ifdef INVARIANTS
445		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
446		    __func__));
447		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
448		    ("%s: ifma not AF_INET", __func__));
449		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
450		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
451		    !in_hosteq(inm->inm_addr, *group))
452			panic("%s: ifma %p is inconsistent with %p (%s)",
453			    __func__, ifma, inm, inet_ntoa(*group));
454#endif
455		++inm->inm_refcount;
456		*pinm = inm;
457		return (0);
458	}
459
460	/*
461	 * A new in_multi record is needed; allocate and initialize it.
462	 * We DO NOT perform an IGMP join as the in_ layer may need to
463	 * push an initial source list down to IGMP to support SSM.
464	 *
465	 * The initial source filter state is INCLUDE, {} as per the RFC.
466	 */
467	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
468	if (inm == NULL) {
469		if_delmulti_ifma(ifma);
470		return (ENOMEM);
471	}
472	inm->inm_addr = *group;
473	inm->inm_ifp = ifp;
474	inm->inm_igi = ii->ii_igmp;
475	inm->inm_ifma = ifma;
476	inm->inm_refcount = 1;
477	inm->inm_state = IGMP_NOT_MEMBER;
478
479	/*
480	 * Pending state-changes per group are subject to a bounds check.
481	 */
482	IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
483
484	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
485	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
486	RB_INIT(&inm->inm_srcs);
487
488	ifma->ifma_protospec = inm;
489
490	*pinm = inm;
491
492	return (0);
493}
494
495/*
496 * Drop a reference to an in_multi record.
497 *
498 * If the refcount drops to 0, free the in_multi record and
499 * delete the underlying link-layer membership.
500 */
501void
502inm_release_locked(struct in_multi *inm)
503{
504	struct ifmultiaddr *ifma;
505
506#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
507	if (!inm_is_ifp_detached(inm))
508		IFF_ASSERTGIANT(ifp);
509#endif
510
511	IN_MULTI_LOCK_ASSERT();
512
513	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
514
515	if (--inm->inm_refcount > 0) {
516		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
517		    inm->inm_refcount);
518		return;
519	}
520
521	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
522
523	ifma = inm->inm_ifma;
524
525	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
526	KASSERT(ifma->ifma_protospec == inm,
527	    ("%s: ifma_protospec != inm", __func__));
528	ifma->ifma_protospec = NULL;
529
530	inm_purge(inm);
531
532	free(inm, M_IPMADDR);
533
534	if_delmulti_ifma(ifma);
535}
536
537/*
538 * Clear recorded source entries for a group.
539 * Used by the IGMP code. Caller must hold the IN_MULTI lock.
540 * FIXME: Should reap.
541 */
542void
543inm_clear_recorded(struct in_multi *inm)
544{
545	struct ip_msource	*ims;
546
547	IN_MULTI_LOCK_ASSERT();
548
549	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
550		if (ims->ims_stp) {
551			ims->ims_stp = 0;
552			--inm->inm_st[1].iss_rec;
553		}
554	}
555	KASSERT(inm->inm_st[1].iss_rec == 0,
556	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
557}
558
559/*
560 * Record a source as pending for a Source-Group IGMPv3 query.
561 * This lives here as it modifies the shared tree.
562 *
563 * inm is the group descriptor.
564 * naddr is the address of the source to record in network-byte order.
565 *
566 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
567 * lazy-allocate a source node in response to an SG query.
568 * Otherwise, no allocation is performed. This saves some memory
569 * with the trade-off that the source will not be reported to the
570 * router if joined in the window between the query response and
571 * the group actually being joined on the local host.
572 *
573 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
574 * This turns off the allocation of a recorded source entry if
575 * the group has not been joined.
576 *
577 * Return 0 if the source didn't exist or was already marked as recorded.
578 * Return 1 if the source was marked as recorded by this function.
579 * Return <0 if any error occured (negated errno code).
580 */
581int
582inm_record_source(struct in_multi *inm, const in_addr_t naddr)
583{
584	struct ip_msource	 find;
585	struct ip_msource	*ims, *nims;
586
587	IN_MULTI_LOCK_ASSERT();
588
589	find.ims_haddr = ntohl(naddr);
590	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
591	if (ims && ims->ims_stp)
592		return (0);
593	if (ims == NULL) {
594		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
595			return (-ENOSPC);
596		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
597		    M_NOWAIT | M_ZERO);
598		if (nims == NULL)
599			return (-ENOMEM);
600		nims->ims_haddr = find.ims_haddr;
601		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
602		++inm->inm_nsrc;
603		ims = nims;
604	}
605
606	/*
607	 * Mark the source as recorded and update the recorded
608	 * source count.
609	 */
610	++ims->ims_stp;
611	++inm->inm_st[1].iss_rec;
612
613	return (1);
614}
615
616/*
617 * Return a pointer to an in_msource owned by an in_mfilter,
618 * given its source address.
619 * Lazy-allocate if needed. If this is a new entry its filter state is
620 * undefined at t0.
621 *
622 * imf is the filter set being modified.
623 * haddr is the source address in *host* byte-order.
624 *
625 * SMPng: May be called with locks held; malloc must not block.
626 */
627static int
628imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
629    struct in_msource **plims)
630{
631	struct ip_msource	 find;
632	struct ip_msource	*ims, *nims;
633	struct in_msource	*lims;
634	int			 error;
635
636	error = 0;
637	ims = NULL;
638	lims = NULL;
639
640	/* key is host byte order */
641	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
642	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
643	lims = (struct in_msource *)ims;
644	if (lims == NULL) {
645		if (imf->imf_nsrc == in_mcast_maxsocksrc)
646			return (ENOSPC);
647		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
648		    M_NOWAIT | M_ZERO);
649		if (nims == NULL)
650			return (ENOMEM);
651		lims = (struct in_msource *)nims;
652		lims->ims_haddr = find.ims_haddr;
653		lims->imsl_st[0] = MCAST_UNDEFINED;
654		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
655		++imf->imf_nsrc;
656	}
657
658	*plims = lims;
659
660	return (error);
661}
662
663/*
664 * Graft a source entry into an existing socket-layer filter set,
665 * maintaining any required invariants and checking allocations.
666 *
667 * The source is marked as being in the new filter mode at t1.
668 *
669 * Return the pointer to the new node, otherwise return NULL.
670 */
671static struct in_msource *
672imf_graft(struct in_mfilter *imf, const uint8_t st1,
673    const struct sockaddr_in *psin)
674{
675	struct ip_msource	*nims;
676	struct in_msource	*lims;
677
678	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
679	    M_NOWAIT | M_ZERO);
680	if (nims == NULL)
681		return (NULL);
682	lims = (struct in_msource *)nims;
683	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
684	lims->imsl_st[0] = MCAST_UNDEFINED;
685	lims->imsl_st[1] = st1;
686	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
687	++imf->imf_nsrc;
688
689	return (lims);
690}
691
692/*
693 * Prune a source entry from an existing socket-layer filter set,
694 * maintaining any required invariants and checking allocations.
695 *
696 * The source is marked as being left at t1, it is not freed.
697 *
698 * Return 0 if no error occurred, otherwise return an errno value.
699 */
700static int
701imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
702{
703	struct ip_msource	 find;
704	struct ip_msource	*ims;
705	struct in_msource	*lims;
706
707	/* key is host byte order */
708	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
709	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
710	if (ims == NULL)
711		return (ENOENT);
712	lims = (struct in_msource *)ims;
713	lims->imsl_st[1] = MCAST_UNDEFINED;
714	return (0);
715}
716
717/*
718 * Revert socket-layer filter set deltas at t1 to t0 state.
719 */
720static void
721imf_rollback(struct in_mfilter *imf)
722{
723	struct ip_msource	*ims, *tims;
724	struct in_msource	*lims;
725
726	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
727		lims = (struct in_msource *)ims;
728		if (lims->imsl_st[0] == lims->imsl_st[1]) {
729			/* no change at t1 */
730			continue;
731		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
732			/* revert change to existing source at t1 */
733			lims->imsl_st[1] = lims->imsl_st[0];
734		} else {
735			/* revert source added t1 */
736			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
737			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
738			free(ims, M_INMFILTER);
739			imf->imf_nsrc--;
740		}
741	}
742	imf->imf_st[1] = imf->imf_st[0];
743}
744
745/*
746 * Mark socket-layer filter set as INCLUDE {} at t1.
747 */
748static void
749imf_leave(struct in_mfilter *imf)
750{
751	struct ip_msource	*ims;
752	struct in_msource	*lims;
753
754	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
755		lims = (struct in_msource *)ims;
756		lims->imsl_st[1] = MCAST_UNDEFINED;
757	}
758	imf->imf_st[1] = MCAST_INCLUDE;
759}
760
761/*
762 * Mark socket-layer filter set deltas as committed.
763 */
764static void
765imf_commit(struct in_mfilter *imf)
766{
767	struct ip_msource	*ims;
768	struct in_msource	*lims;
769
770	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
771		lims = (struct in_msource *)ims;
772		lims->imsl_st[0] = lims->imsl_st[1];
773	}
774	imf->imf_st[0] = imf->imf_st[1];
775}
776
777/*
778 * Reap unreferenced sources from socket-layer filter set.
779 */
780static void
781imf_reap(struct in_mfilter *imf)
782{
783	struct ip_msource	*ims, *tims;
784	struct in_msource	*lims;
785
786	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
787		lims = (struct in_msource *)ims;
788		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
789		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
790			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
791			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
792			free(ims, M_INMFILTER);
793			imf->imf_nsrc--;
794		}
795	}
796}
797
798/*
799 * Purge socket-layer filter set.
800 */
801static void
802imf_purge(struct in_mfilter *imf)
803{
804	struct ip_msource	*ims, *tims;
805
806	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
807		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
808		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
809		free(ims, M_INMFILTER);
810		imf->imf_nsrc--;
811	}
812	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
813	KASSERT(RB_EMPTY(&imf->imf_sources),
814	    ("%s: imf_sources not empty", __func__));
815}
816
817/*
818 * Look up a source filter entry for a multicast group.
819 *
820 * inm is the group descriptor to work with.
821 * haddr is the host-byte-order IPv4 address to look up.
822 * noalloc may be non-zero to suppress allocation of sources.
823 * *pims will be set to the address of the retrieved or allocated source.
824 *
825 * SMPng: NOTE: may be called with locks held.
826 * Return 0 if successful, otherwise return a non-zero error code.
827 */
828static int
829inm_get_source(struct in_multi *inm, const in_addr_t haddr,
830    const int noalloc, struct ip_msource **pims)
831{
832	struct ip_msource	 find;
833	struct ip_msource	*ims, *nims;
834#ifdef KTR
835	struct in_addr ia;
836#endif
837
838	find.ims_haddr = haddr;
839	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
840	if (ims == NULL && !noalloc) {
841		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
842			return (ENOSPC);
843		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
844		    M_NOWAIT | M_ZERO);
845		if (nims == NULL)
846			return (ENOMEM);
847		nims->ims_haddr = haddr;
848		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
849		++inm->inm_nsrc;
850		ims = nims;
851#ifdef KTR
852		ia.s_addr = htonl(haddr);
853		CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
854		    inet_ntoa(ia), ims);
855#endif
856	}
857
858	*pims = ims;
859	return (0);
860}
861
862/*
863 * Merge socket-layer source into IGMP-layer source.
864 * If rollback is non-zero, perform the inverse of the merge.
865 */
866static void
867ims_merge(struct ip_msource *ims, const struct in_msource *lims,
868    const int rollback)
869{
870	int n = rollback ? -1 : 1;
871#ifdef KTR
872	struct in_addr ia;
873
874	ia.s_addr = htonl(ims->ims_haddr);
875#endif
876
877	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
878		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
879		    __func__, n, inet_ntoa(ia));
880		ims->ims_st[1].ex -= n;
881	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
882		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
883		    __func__, n, inet_ntoa(ia));
884		ims->ims_st[1].in -= n;
885	}
886
887	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
888		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
889		    __func__, n, inet_ntoa(ia));
890		ims->ims_st[1].ex += n;
891	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
892		CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
893		    __func__, n, inet_ntoa(ia));
894		ims->ims_st[1].in += n;
895	}
896}
897
898/*
899 * Atomically update the global in_multi state, when a membership's
900 * filter list is being updated in any way.
901 *
902 * imf is the per-inpcb-membership group filter pointer.
903 * A fake imf may be passed for in-kernel consumers.
904 *
905 * XXX This is a candidate for a set-symmetric-difference style loop
906 * which would eliminate the repeated lookup from root of ims nodes,
907 * as they share the same key space.
908 *
909 * If any error occurred this function will back out of refcounts
910 * and return a non-zero value.
911 */
912static int
913inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
914{
915	struct ip_msource	*ims, *nims;
916	struct in_msource	*lims;
917	int			 schanged, error;
918	int			 nsrc0, nsrc1;
919
920	schanged = 0;
921	error = 0;
922	nsrc1 = nsrc0 = 0;
923
924	/*
925	 * Update the source filters first, as this may fail.
926	 * Maintain count of in-mode filters at t0, t1. These are
927	 * used to work out if we transition into ASM mode or not.
928	 * Maintain a count of source filters whose state was
929	 * actually modified by this operation.
930	 */
931	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
932		lims = (struct in_msource *)ims;
933		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
934		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
935		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
936		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
937		++schanged;
938		if (error)
939			break;
940		ims_merge(nims, lims, 0);
941	}
942	if (error) {
943		struct ip_msource *bims;
944
945		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
946			lims = (struct in_msource *)ims;
947			if (lims->imsl_st[0] == lims->imsl_st[1])
948				continue;
949			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
950			if (bims == NULL)
951				continue;
952			ims_merge(bims, lims, 1);
953		}
954		goto out_reap;
955	}
956
957	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
958	    __func__, nsrc0, nsrc1);
959
960	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
961	if (imf->imf_st[0] == imf->imf_st[1] &&
962	    imf->imf_st[1] == MCAST_INCLUDE) {
963		if (nsrc1 == 0) {
964			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
965			--inm->inm_st[1].iss_in;
966		}
967	}
968
969	/* Handle filter mode transition on socket. */
970	if (imf->imf_st[0] != imf->imf_st[1]) {
971		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
972		    __func__, imf->imf_st[0], imf->imf_st[1]);
973
974		if (imf->imf_st[0] == MCAST_EXCLUDE) {
975			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
976			--inm->inm_st[1].iss_ex;
977		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
978			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
979			--inm->inm_st[1].iss_in;
980		}
981
982		if (imf->imf_st[1] == MCAST_EXCLUDE) {
983			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
984			inm->inm_st[1].iss_ex++;
985		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
986			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
987			inm->inm_st[1].iss_in++;
988		}
989	}
990
991	/*
992	 * Track inm filter state in terms of listener counts.
993	 * If there are any exclusive listeners, stack-wide
994	 * membership is exclusive.
995	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
996	 * If no listeners remain, state is undefined at t1,
997	 * and the IGMP lifecycle for this group should finish.
998	 */
999	if (inm->inm_st[1].iss_ex > 0) {
1000		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
1001		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1002	} else if (inm->inm_st[1].iss_in > 0) {
1003		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1004		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1005	} else {
1006		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1007		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1008	}
1009
1010	/* Decrement ASM listener count on transition out of ASM mode. */
1011	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1012		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1013		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
1014			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1015			--inm->inm_st[1].iss_asm;
1016	}
1017
1018	/* Increment ASM listener count on transition to ASM mode. */
1019	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1020		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1021		inm->inm_st[1].iss_asm++;
1022	}
1023
1024	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1025	inm_print(inm);
1026
1027out_reap:
1028	if (schanged > 0) {
1029		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1030		inm_reap(inm);
1031	}
1032	return (error);
1033}
1034
1035/*
1036 * Mark an in_multi's filter set deltas as committed.
1037 * Called by IGMP after a state change has been enqueued.
1038 */
1039void
1040inm_commit(struct in_multi *inm)
1041{
1042	struct ip_msource	*ims;
1043
1044	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1045	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1046	inm_print(inm);
1047
1048	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1049		ims->ims_st[0] = ims->ims_st[1];
1050	}
1051	inm->inm_st[0] = inm->inm_st[1];
1052}
1053
1054/*
1055 * Reap unreferenced nodes from an in_multi's filter set.
1056 */
1057static void
1058inm_reap(struct in_multi *inm)
1059{
1060	struct ip_msource	*ims, *tims;
1061
1062	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1063		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1064		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1065		    ims->ims_stp != 0)
1066			continue;
1067		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1068		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1069		free(ims, M_IPMSOURCE);
1070		inm->inm_nsrc--;
1071	}
1072}
1073
1074/*
1075 * Purge all source nodes from an in_multi's filter set.
1076 */
1077static void
1078inm_purge(struct in_multi *inm)
1079{
1080	struct ip_msource	*ims, *tims;
1081
1082	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1083		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1084		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1085		free(ims, M_IPMSOURCE);
1086		inm->inm_nsrc--;
1087	}
1088}
1089
1090/*
1091 * Join a multicast group; unlocked entry point.
1092 *
1093 * SMPng: XXX: in_joingroup() is called from in_control() when Giant
1094 * is not held. Fortunately, ifp is unlikely to have been detached
1095 * at this point, so we assume it's OK to recurse.
1096 */
1097int
1098in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1099    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1100{
1101	int error;
1102
1103	IFF_LOCKGIANT(ifp);
1104	IN_MULTI_LOCK();
1105	error = in_joingroup_locked(ifp, gina, imf, pinm);
1106	IN_MULTI_UNLOCK();
1107	IFF_UNLOCKGIANT(ifp);
1108
1109	return (error);
1110}
1111
1112/*
1113 * Join a multicast group; real entry point.
1114 *
1115 * Only preserves atomicity at inm level.
1116 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1117 *
1118 * If the IGMP downcall fails, the group is not joined, and an error
1119 * code is returned.
1120 */
1121int
1122in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1123    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1124{
1125	struct in_mfilter	 timf;
1126	struct in_multi		*inm;
1127	int			 error;
1128
1129	IN_MULTI_LOCK_ASSERT();
1130
1131	CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
1132	    inet_ntoa(*gina), ifp, ifp->if_xname);
1133
1134	error = 0;
1135	inm = NULL;
1136
1137	/*
1138	 * If no imf was specified (i.e. kernel consumer),
1139	 * fake one up and assume it is an ASM join.
1140	 */
1141	if (imf == NULL) {
1142		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1143		imf = &timf;
1144	}
1145
1146	error = in_getmulti(ifp, gina, &inm);
1147	if (error) {
1148		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1149		return (error);
1150	}
1151
1152	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1153	error = inm_merge(inm, imf);
1154	if (error) {
1155		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1156		goto out_inm_release;
1157	}
1158
1159	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1160	error = igmp_change_state(inm);
1161	if (error) {
1162		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1163		goto out_inm_release;
1164	}
1165
1166out_inm_release:
1167	if (error) {
1168		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1169		inm_release_locked(inm);
1170	} else {
1171		*pinm = inm;
1172	}
1173
1174	return (error);
1175}
1176
1177/*
1178 * Leave a multicast group; unlocked entry point.
1179 */
1180int
1181in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1182{
1183	struct ifnet *ifp;
1184	int detached, error;
1185
1186	detached = inm_is_ifp_detached(inm);
1187	ifp = inm->inm_ifp;
1188	if (!detached)
1189		IFF_LOCKGIANT(ifp);
1190
1191	IN_MULTI_LOCK();
1192	error = in_leavegroup_locked(inm, imf);
1193	IN_MULTI_UNLOCK();
1194
1195	if (!detached)
1196		IFF_UNLOCKGIANT(ifp);
1197
1198	return (error);
1199}
1200
1201/*
1202 * Leave a multicast group; real entry point.
1203 * All source filters will be expunged.
1204 *
1205 * Only preserves atomicity at inm level.
1206 *
1207 * Holding the write lock for the INP which contains imf
1208 * is highly advisable. We can't assert for it as imf does not
1209 * contain a back-pointer to the owning inp.
1210 *
1211 * Note: This is not the same as inm_release(*) as this function also
1212 * makes a state change downcall into IGMP.
1213 */
1214int
1215in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1216{
1217	struct in_mfilter	 timf;
1218	int			 error;
1219
1220	error = 0;
1221
1222#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
1223	if (!inm_is_ifp_detached(inm))
1224		IFF_ASSERTGIANT(inm->inm_ifp);
1225#endif
1226
1227	IN_MULTI_LOCK_ASSERT();
1228
1229	CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
1230	    inm, inet_ntoa(inm->inm_addr),
1231	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1232	    imf);
1233
1234	/*
1235	 * If no imf was specified (i.e. kernel consumer),
1236	 * fake one up and assume it is an ASM join.
1237	 */
1238	if (imf == NULL) {
1239		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1240		imf = &timf;
1241	}
1242
1243	/*
1244	 * Begin state merge transaction at IGMP layer.
1245	 *
1246	 * As this particular invocation should not cause any memory
1247	 * to be allocated, and there is no opportunity to roll back
1248	 * the transaction, it MUST NOT fail.
1249	 */
1250	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1251	error = inm_merge(inm, imf);
1252	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1253
1254	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1255	error = igmp_change_state(inm);
1256	if (error)
1257		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1258
1259	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1260	inm_release_locked(inm);
1261
1262	return (error);
1263}
1264
1265/*#ifndef BURN_BRIDGES*/
1266/*
1267 * Join an IPv4 multicast group in (*,G) exclusive mode.
1268 * The group must be a 224.0.0.0/24 link-scope group.
1269 * This KPI is for legacy kernel consumers only.
1270 */
1271struct in_multi *
1272in_addmulti(struct in_addr *ap, struct ifnet *ifp)
1273{
1274	struct in_multi *pinm;
1275	int error;
1276
1277	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
1278	    ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
1279
1280	error = in_joingroup(ifp, ap, NULL, &pinm);
1281	if (error != 0)
1282		pinm = NULL;
1283
1284	return (pinm);
1285}
1286
1287/*
1288 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
1289 * This KPI is for legacy kernel consumers only.
1290 */
1291void
1292in_delmulti(struct in_multi *inm)
1293{
1294
1295	(void)in_leavegroup(inm, NULL);
1296}
1297/*#endif*/
1298
1299/*
1300 * Block or unblock an ASM multicast source on an inpcb.
1301 * This implements the delta-based API described in RFC 3678.
1302 *
1303 * The delta-based API applies only to exclusive-mode memberships.
1304 * An IGMP downcall will be performed.
1305 *
1306 * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1307 *
1308 * Return 0 if successful, otherwise return an appropriate error code.
1309 */
1310static int
1311inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1312{
1313	INIT_VNET_NET(curvnet);
1314	INIT_VNET_INET(curvnet);
1315	struct group_source_req		 gsr;
1316	sockunion_t			*gsa, *ssa;
1317	struct ifnet			*ifp;
1318	struct in_mfilter		*imf;
1319	struct ip_moptions		*imo;
1320	struct in_msource		*ims;
1321	struct in_multi			*inm;
1322	size_t				 idx;
1323	uint16_t			 fmode;
1324	int				 error, doblock;
1325
1326	ifp = NULL;
1327	error = 0;
1328	doblock = 0;
1329
1330	memset(&gsr, 0, sizeof(struct group_source_req));
1331	gsa = (sockunion_t *)&gsr.gsr_group;
1332	ssa = (sockunion_t *)&gsr.gsr_source;
1333
1334	switch (sopt->sopt_name) {
1335	case IP_BLOCK_SOURCE:
1336	case IP_UNBLOCK_SOURCE: {
1337		struct ip_mreq_source	 mreqs;
1338
1339		error = sooptcopyin(sopt, &mreqs,
1340		    sizeof(struct ip_mreq_source),
1341		    sizeof(struct ip_mreq_source));
1342		if (error)
1343			return (error);
1344
1345		gsa->sin.sin_family = AF_INET;
1346		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1347		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1348
1349		ssa->sin.sin_family = AF_INET;
1350		ssa->sin.sin_len = sizeof(struct sockaddr_in);
1351		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1352
1353		if (!in_nullhost(mreqs.imr_interface))
1354			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1355
1356		if (sopt->sopt_name == IP_BLOCK_SOURCE)
1357			doblock = 1;
1358
1359		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1360		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1361		break;
1362	    }
1363
1364	case MCAST_BLOCK_SOURCE:
1365	case MCAST_UNBLOCK_SOURCE:
1366		error = sooptcopyin(sopt, &gsr,
1367		    sizeof(struct group_source_req),
1368		    sizeof(struct group_source_req));
1369		if (error)
1370			return (error);
1371
1372		if (gsa->sin.sin_family != AF_INET ||
1373		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1374			return (EINVAL);
1375
1376		if (ssa->sin.sin_family != AF_INET ||
1377		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1378			return (EINVAL);
1379
1380		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1381			return (EADDRNOTAVAIL);
1382
1383		ifp = ifnet_byindex(gsr.gsr_interface);
1384
1385		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1386			doblock = 1;
1387		break;
1388
1389	default:
1390		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1391		    __func__, sopt->sopt_name);
1392		return (EOPNOTSUPP);
1393		break;
1394	}
1395
1396	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1397		return (EINVAL);
1398
1399	IFF_LOCKGIANT(ifp);
1400
1401	/*
1402	 * Check if we are actually a member of this group.
1403	 */
1404	imo = inp_findmoptions(inp);
1405	idx = imo_match_group(imo, ifp, &gsa->sa);
1406	if (idx == -1 || imo->imo_mfilters == NULL) {
1407		error = EADDRNOTAVAIL;
1408		goto out_inp_locked;
1409	}
1410
1411	KASSERT(imo->imo_mfilters != NULL,
1412	    ("%s: imo_mfilters not allocated", __func__));
1413	imf = &imo->imo_mfilters[idx];
1414	inm = imo->imo_membership[idx];
1415
1416	/*
1417	 * Attempting to use the delta-based API on an
1418	 * non exclusive-mode membership is an error.
1419	 */
1420	fmode = imf->imf_st[0];
1421	if (fmode != MCAST_EXCLUDE) {
1422		error = EINVAL;
1423		goto out_inp_locked;
1424	}
1425
1426	/*
1427	 * Deal with error cases up-front:
1428	 *  Asked to block, but already blocked; or
1429	 *  Asked to unblock, but nothing to unblock.
1430	 * If adding a new block entry, allocate it.
1431	 */
1432	ims = imo_match_source(imo, idx, &ssa->sa);
1433	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1434		CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
1435		    inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
1436		error = EADDRNOTAVAIL;
1437		goto out_inp_locked;
1438	}
1439
1440	INP_WLOCK_ASSERT(inp);
1441
1442	/*
1443	 * Begin state merge transaction at socket layer.
1444	 */
1445	if (doblock) {
1446		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1447		ims = imf_graft(imf, fmode, &ssa->sin);
1448		if (ims == NULL)
1449			error = ENOMEM;
1450	} else {
1451		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1452		error = imf_prune(imf, &ssa->sin);
1453	}
1454
1455	if (error) {
1456		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1457		goto out_imf_rollback;
1458	}
1459
1460	/*
1461	 * Begin state merge transaction at IGMP layer.
1462	 */
1463	IN_MULTI_LOCK();
1464
1465	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1466	error = inm_merge(inm, imf);
1467	if (error) {
1468		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1469		goto out_imf_rollback;
1470	}
1471
1472	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1473	error = igmp_change_state(inm);
1474	if (error)
1475		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1476
1477	IN_MULTI_UNLOCK();
1478
1479out_imf_rollback:
1480	if (error)
1481		imf_rollback(imf);
1482	else
1483		imf_commit(imf);
1484
1485	imf_reap(imf);
1486
1487out_inp_locked:
1488	INP_WUNLOCK(inp);
1489	IFF_UNLOCKGIANT(ifp);
1490	return (error);
1491}
1492
1493/*
1494 * Given an inpcb, return its multicast options structure pointer.  Accepts
1495 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1496 *
1497 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1498 * SMPng: NOTE: Returns with the INP write lock held.
1499 */
1500static struct ip_moptions *
1501inp_findmoptions(struct inpcb *inp)
1502{
1503	struct ip_moptions	 *imo;
1504	struct in_multi		**immp;
1505	struct in_mfilter	 *imfp;
1506	size_t			  idx;
1507
1508	INP_WLOCK(inp);
1509	if (inp->inp_moptions != NULL)
1510		return (inp->inp_moptions);
1511
1512	INP_WUNLOCK(inp);
1513
1514	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1515	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
1516	    M_WAITOK | M_ZERO);
1517	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
1518	    M_INMFILTER, M_WAITOK);
1519
1520	imo->imo_multicast_ifp = NULL;
1521	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1522	imo->imo_multicast_vif = -1;
1523	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1524	imo->imo_multicast_loop = in_mcast_loop;
1525	imo->imo_num_memberships = 0;
1526	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1527	imo->imo_membership = immp;
1528
1529	/* Initialize per-group source filters. */
1530	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
1531		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1532	imo->imo_mfilters = imfp;
1533
1534	INP_WLOCK(inp);
1535	if (inp->inp_moptions != NULL) {
1536		free(imfp, M_INMFILTER);
1537		free(immp, M_IPMOPTS);
1538		free(imo, M_IPMOPTS);
1539		return (inp->inp_moptions);
1540	}
1541	inp->inp_moptions = imo;
1542	return (imo);
1543}
1544
1545/*
1546 * Discard the IP multicast options (and source filters).
1547 *
1548 * SMPng: NOTE: assumes INP write lock is held.
1549 */
1550void
1551inp_freemoptions(struct ip_moptions *imo)
1552{
1553	struct in_mfilter	*imf;
1554	size_t			 idx, nmships;
1555
1556	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
1557
1558	nmships = imo->imo_num_memberships;
1559	for (idx = 0; idx < nmships; ++idx) {
1560		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
1561		if (imf)
1562			imf_leave(imf);
1563		(void)in_leavegroup(imo->imo_membership[idx], imf);
1564		if (imf)
1565			imf_purge(imf);
1566	}
1567
1568	if (imo->imo_mfilters)
1569		free(imo->imo_mfilters, M_INMFILTER);
1570	free(imo->imo_membership, M_IPMOPTS);
1571	free(imo, M_IPMOPTS);
1572}
1573
1574/*
1575 * Atomically get source filters on a socket for an IPv4 multicast group.
1576 * Called with INP lock held; returns with lock released.
1577 */
1578static int
1579inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1580{
1581	INIT_VNET_NET(curvnet);
1582	struct __msfilterreq	 msfr;
1583	sockunion_t		*gsa;
1584	struct ifnet		*ifp;
1585	struct ip_moptions	*imo;
1586	struct in_mfilter	*imf;
1587	struct ip_msource	*ims;
1588	struct in_msource	*lims;
1589	struct sockaddr_in	*psin;
1590	struct sockaddr_storage	*ptss;
1591	struct sockaddr_storage	*tss;
1592	int			 error;
1593	size_t			 idx, nsrcs, ncsrcs;
1594
1595	INP_WLOCK_ASSERT(inp);
1596
1597	imo = inp->inp_moptions;
1598	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1599
1600	INP_WUNLOCK(inp);
1601
1602	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1603	    sizeof(struct __msfilterreq));
1604	if (error)
1605		return (error);
1606
1607	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1608		return (EINVAL);
1609
1610	ifp = ifnet_byindex(msfr.msfr_ifindex);
1611	if (ifp == NULL)
1612		return (EINVAL);
1613
1614	INP_WLOCK(inp);
1615
1616	/*
1617	 * Lookup group on the socket.
1618	 */
1619	gsa = (sockunion_t *)&msfr.msfr_group;
1620	idx = imo_match_group(imo, ifp, &gsa->sa);
1621	if (idx == -1 || imo->imo_mfilters == NULL) {
1622		INP_WUNLOCK(inp);
1623		return (EADDRNOTAVAIL);
1624	}
1625	imf = &imo->imo_mfilters[idx];
1626
1627	/*
1628	 * Ignore memberships which are in limbo.
1629	 */
1630	if (imf->imf_st[1] == MCAST_UNDEFINED) {
1631		INP_WUNLOCK(inp);
1632		return (EAGAIN);
1633	}
1634	msfr.msfr_fmode = imf->imf_st[1];
1635
1636	/*
1637	 * If the user specified a buffer, copy out the source filter
1638	 * entries to userland gracefully.
1639	 * We only copy out the number of entries which userland
1640	 * has asked for, but we always tell userland how big the
1641	 * buffer really needs to be.
1642	 */
1643	tss = NULL;
1644	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1645		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1646		    M_TEMP, M_NOWAIT | M_ZERO);
1647		if (tss == NULL) {
1648			INP_WUNLOCK(inp);
1649			return (ENOBUFS);
1650		}
1651	}
1652
1653	/*
1654	 * Count number of sources in-mode at t0.
1655	 * If buffer space exists and remains, copy out source entries.
1656	 */
1657	nsrcs = msfr.msfr_nsrcs;
1658	ncsrcs = 0;
1659	ptss = tss;
1660	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1661		lims = (struct in_msource *)ims;
1662		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1663		    lims->imsl_st[0] != imf->imf_st[0])
1664			continue;
1665		++ncsrcs;
1666		if (tss != NULL && nsrcs-- > 0) {
1667			psin = (struct sockaddr_in *)ptss++;
1668			psin->sin_family = AF_INET;
1669			psin->sin_len = sizeof(struct sockaddr_in);
1670			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1671		}
1672	}
1673
1674	INP_WUNLOCK(inp);
1675
1676	if (tss != NULL) {
1677		error = copyout(tss, msfr.msfr_srcs,
1678		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1679		free(tss, M_TEMP);
1680		if (error)
1681			return (error);
1682	}
1683
1684	msfr.msfr_nsrcs = ncsrcs;
1685	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1686
1687	return (error);
1688}
1689
1690/*
1691 * Return the IP multicast options in response to user getsockopt().
1692 */
1693int
1694inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1695{
1696	INIT_VNET_INET(curvnet);
1697	struct ip_mreqn		 mreqn;
1698	struct ip_moptions	*imo;
1699	struct ifnet		*ifp;
1700	struct in_ifaddr	*ia;
1701	int			 error, optval;
1702	u_char			 coptval;
1703
1704	INP_WLOCK(inp);
1705	imo = inp->inp_moptions;
1706	/*
1707	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1708	 * or is a divert socket, reject it.
1709	 */
1710	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1711	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1712	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1713		INP_WUNLOCK(inp);
1714		return (EOPNOTSUPP);
1715	}
1716
1717	error = 0;
1718	switch (sopt->sopt_name) {
1719	case IP_MULTICAST_VIF:
1720		if (imo != NULL)
1721			optval = imo->imo_multicast_vif;
1722		else
1723			optval = -1;
1724		INP_WUNLOCK(inp);
1725		error = sooptcopyout(sopt, &optval, sizeof(int));
1726		break;
1727
1728	case IP_MULTICAST_IF:
1729		memset(&mreqn, 0, sizeof(struct ip_mreqn));
1730		if (imo != NULL) {
1731			ifp = imo->imo_multicast_ifp;
1732			if (!in_nullhost(imo->imo_multicast_addr)) {
1733				mreqn.imr_address = imo->imo_multicast_addr;
1734			} else if (ifp != NULL) {
1735				mreqn.imr_ifindex = ifp->if_index;
1736				IFP_TO_IA(ifp, ia);
1737				if (ia != NULL) {
1738					mreqn.imr_address =
1739					    IA_SIN(ia)->sin_addr;
1740				}
1741			}
1742		}
1743		INP_WUNLOCK(inp);
1744		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1745			error = sooptcopyout(sopt, &mreqn,
1746			    sizeof(struct ip_mreqn));
1747		} else {
1748			error = sooptcopyout(sopt, &mreqn.imr_address,
1749			    sizeof(struct in_addr));
1750		}
1751		break;
1752
1753	case IP_MULTICAST_TTL:
1754		if (imo == 0)
1755			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1756		else
1757			optval = coptval = imo->imo_multicast_ttl;
1758		INP_WUNLOCK(inp);
1759		if (sopt->sopt_valsize == sizeof(u_char))
1760			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1761		else
1762			error = sooptcopyout(sopt, &optval, sizeof(int));
1763		break;
1764
1765	case IP_MULTICAST_LOOP:
1766		if (imo == 0)
1767			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1768		else
1769			optval = coptval = imo->imo_multicast_loop;
1770		INP_WUNLOCK(inp);
1771		if (sopt->sopt_valsize == sizeof(u_char))
1772			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1773		else
1774			error = sooptcopyout(sopt, &optval, sizeof(int));
1775		break;
1776
1777	case IP_MSFILTER:
1778		if (imo == NULL) {
1779			error = EADDRNOTAVAIL;
1780			INP_WUNLOCK(inp);
1781		} else {
1782			error = inp_get_source_filters(inp, sopt);
1783		}
1784		break;
1785
1786	default:
1787		INP_WUNLOCK(inp);
1788		error = ENOPROTOOPT;
1789		break;
1790	}
1791
1792	INP_UNLOCK_ASSERT(inp);
1793
1794	return (error);
1795}
1796
1797/*
1798 * Look up the ifnet to use for a multicast group membership,
1799 * given the IPv4 address of an interface, and the IPv4 group address.
1800 *
1801 * This routine exists to support legacy multicast applications
1802 * which do not understand that multicast memberships are scoped to
1803 * specific physical links in the networking stack, or which need
1804 * to join link-scope groups before IPv4 addresses are configured.
1805 *
1806 * If inp is non-NULL, use this socket's current FIB number for any
1807 * required FIB lookup.
1808 * If ina is INADDR_ANY, look up the group address in the unicast FIB,
1809 * and use its ifp; usually, this points to the default next-hop.
1810 *
1811 * If the FIB lookup fails, attempt to use the first non-loopback
1812 * interface with multicast capability in the system as a
1813 * last resort. The legacy IPv4 ASM API requires that we do
1814 * this in order to allow groups to be joined when the routing
1815 * table has not yet been populated during boot.
1816 *
1817 * Returns NULL if no ifp could be found.
1818 *
1819 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
1820 * FUTURE: Implement IPv4 source-address selection.
1821 */
1822static struct ifnet *
1823inp_lookup_mcast_ifp(const struct inpcb *inp,
1824    const struct sockaddr_in *gsin, const struct in_addr ina)
1825{
1826	struct ifnet *ifp;
1827
1828	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
1829	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
1830	    ("%s: not multicast", __func__));
1831
1832	ifp = NULL;
1833	if (!in_nullhost(ina)) {
1834		INADDR_TO_IFP(ina, ifp);
1835	} else {
1836		struct route ro;
1837
1838		ro.ro_rt = NULL;
1839		memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
1840		in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
1841		if (ro.ro_rt != NULL) {
1842			ifp = ro.ro_rt->rt_ifp;
1843			KASSERT(ifp != NULL, ("%s: null ifp", __func__));
1844			RTFREE(ro.ro_rt);
1845		} else {
1846			struct in_ifaddr *ia;
1847			struct ifnet *mifp;
1848
1849			mifp = NULL;
1850			TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1851				mifp = ia->ia_ifp;
1852				if (!(mifp->if_flags & IFF_LOOPBACK) &&
1853				     (mifp->if_flags & IFF_MULTICAST)) {
1854					ifp = mifp;
1855					break;
1856				}
1857			}
1858		}
1859	}
1860
1861	return (ifp);
1862}
1863
1864/*
1865 * Join an IPv4 multicast group, possibly with a source.
1866 */
1867static int
1868inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1869{
1870	INIT_VNET_NET(curvnet);
1871	INIT_VNET_INET(curvnet);
1872	struct group_source_req		 gsr;
1873	sockunion_t			*gsa, *ssa;
1874	struct ifnet			*ifp;
1875	struct in_mfilter		*imf;
1876	struct ip_moptions		*imo;
1877	struct in_multi			*inm;
1878	struct in_msource		*lims;
1879	size_t				 idx;
1880	int				 error, is_new;
1881
1882	ifp = NULL;
1883	imf = NULL;
1884	error = 0;
1885	is_new = 0;
1886
1887	memset(&gsr, 0, sizeof(struct group_source_req));
1888	gsa = (sockunion_t *)&gsr.gsr_group;
1889	gsa->ss.ss_family = AF_UNSPEC;
1890	ssa = (sockunion_t *)&gsr.gsr_source;
1891	ssa->ss.ss_family = AF_UNSPEC;
1892
1893	switch (sopt->sopt_name) {
1894	case IP_ADD_MEMBERSHIP:
1895	case IP_ADD_SOURCE_MEMBERSHIP: {
1896		struct ip_mreq_source	 mreqs;
1897
1898		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
1899			error = sooptcopyin(sopt, &mreqs,
1900			    sizeof(struct ip_mreq),
1901			    sizeof(struct ip_mreq));
1902			/*
1903			 * Do argument switcharoo from ip_mreq into
1904			 * ip_mreq_source to avoid using two instances.
1905			 */
1906			mreqs.imr_interface = mreqs.imr_sourceaddr;
1907			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1908		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1909			error = sooptcopyin(sopt, &mreqs,
1910			    sizeof(struct ip_mreq_source),
1911			    sizeof(struct ip_mreq_source));
1912		}
1913		if (error)
1914			return (error);
1915
1916		gsa->sin.sin_family = AF_INET;
1917		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1918		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1919
1920		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1921			ssa->sin.sin_family = AF_INET;
1922			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1923			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1924		}
1925
1926		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
1927		    mreqs.imr_interface);
1928		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1929		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1930		break;
1931	}
1932
1933	case MCAST_JOIN_GROUP:
1934	case MCAST_JOIN_SOURCE_GROUP:
1935		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1936			error = sooptcopyin(sopt, &gsr,
1937			    sizeof(struct group_req),
1938			    sizeof(struct group_req));
1939		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1940			error = sooptcopyin(sopt, &gsr,
1941			    sizeof(struct group_source_req),
1942			    sizeof(struct group_source_req));
1943		}
1944		if (error)
1945			return (error);
1946
1947		if (gsa->sin.sin_family != AF_INET ||
1948		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1949			return (EINVAL);
1950
1951		/*
1952		 * Overwrite the port field if present, as the sockaddr
1953		 * being copied in may be matched with a binary comparison.
1954		 */
1955		gsa->sin.sin_port = 0;
1956		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1957			if (ssa->sin.sin_family != AF_INET ||
1958			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1959				return (EINVAL);
1960			ssa->sin.sin_port = 0;
1961		}
1962
1963		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1964			return (EADDRNOTAVAIL);
1965		ifp = ifnet_byindex(gsr.gsr_interface);
1966		break;
1967
1968	default:
1969		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1970		    __func__, sopt->sopt_name);
1971		return (EOPNOTSUPP);
1972		break;
1973	}
1974
1975	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1976		return (EINVAL);
1977
1978	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1979		return (EADDRNOTAVAIL);
1980
1981	IFF_LOCKGIANT(ifp);
1982
1983	/*
1984	 * MCAST_JOIN_SOURCE on an exclusive membership is an error.
1985	 * On an existing inclusive membership, it just adds the
1986	 * source to the filter list.
1987	 */
1988	imo = inp_findmoptions(inp);
1989	idx = imo_match_group(imo, ifp, &gsa->sa);
1990	if (idx == -1) {
1991		is_new = 1;
1992	} else {
1993		inm = imo->imo_membership[idx];
1994		imf = &imo->imo_mfilters[idx];
1995		if (ssa->ss.ss_family != AF_UNSPEC &&
1996		    imf->imf_st[1] != MCAST_INCLUDE) {
1997			error = EINVAL;
1998			goto out_inp_locked;
1999		}
2000		lims = imo_match_source(imo, idx, &ssa->sa);
2001		if (lims != NULL) {
2002			error = EADDRNOTAVAIL;
2003			goto out_inp_locked;
2004		}
2005	}
2006
2007	/*
2008	 * Begin state merge transaction at socket layer.
2009	 */
2010	INP_WLOCK_ASSERT(inp);
2011
2012	if (is_new) {
2013		if (imo->imo_num_memberships == imo->imo_max_memberships) {
2014			error = imo_grow(imo);
2015			if (error)
2016				goto out_inp_locked;
2017		}
2018		/*
2019		 * Allocate the new slot upfront so we can deal with
2020		 * grafting the new source filter in same code path
2021		 * as for join-source on existing membership.
2022		 */
2023		idx = imo->imo_num_memberships;
2024		imo->imo_membership[idx] = NULL;
2025		imo->imo_num_memberships++;
2026		KASSERT(imo->imo_mfilters != NULL,
2027		    ("%s: imf_mfilters vector was not allocated", __func__));
2028		imf = &imo->imo_mfilters[idx];
2029		KASSERT(RB_EMPTY(&imf->imf_sources),
2030		    ("%s: imf_sources not empty", __func__));
2031	}
2032
2033	/*
2034	 * Graft new source into filter list for this inpcb's
2035	 * membership of the group. The in_multi may not have
2036	 * been allocated yet if this is a new membership.
2037	 */
2038	if (ssa->ss.ss_family != AF_UNSPEC) {
2039		/* Membership starts in IN mode */
2040		if (is_new) {
2041			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2042			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
2043		} else {
2044			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2045		}
2046		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2047		if (lims == NULL) {
2048			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2049			    __func__);
2050			error = ENOMEM;
2051			goto out_imo_free;
2052		}
2053	}
2054
2055	/*
2056	 * Begin state merge transaction at IGMP layer.
2057	 */
2058	IN_MULTI_LOCK();
2059
2060	if (is_new) {
2061		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2062		    &inm);
2063		if (error)
2064			goto out_imo_free;
2065		imo->imo_membership[idx] = inm;
2066	} else {
2067		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2068		error = inm_merge(inm, imf);
2069		if (error) {
2070			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2071			    __func__);
2072			goto out_imf_rollback;
2073		}
2074		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2075		error = igmp_change_state(inm);
2076		if (error) {
2077			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2078			    __func__);
2079			goto out_imf_rollback;
2080		}
2081	}
2082
2083	IN_MULTI_UNLOCK();
2084
2085out_imf_rollback:
2086	INP_WLOCK_ASSERT(inp);
2087	if (error) {
2088		imf_rollback(imf);
2089		if (is_new)
2090			imf_purge(imf);
2091		else
2092			imf_reap(imf);
2093	} else {
2094		imf_commit(imf);
2095	}
2096
2097out_imo_free:
2098	if (error && is_new) {
2099		imo->imo_membership[idx] = NULL;
2100		--imo->imo_num_memberships;
2101	}
2102
2103out_inp_locked:
2104	INP_WUNLOCK(inp);
2105	IFF_UNLOCKGIANT(ifp);
2106	return (error);
2107}
2108
2109/*
2110 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2111 */
2112static int
2113inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2114{
2115	INIT_VNET_NET(curvnet);
2116	INIT_VNET_INET(curvnet);
2117	struct group_source_req		 gsr;
2118	struct ip_mreq_source		 mreqs;
2119	sockunion_t			*gsa, *ssa;
2120	struct ifnet			*ifp;
2121	struct in_mfilter		*imf;
2122	struct ip_moptions		*imo;
2123	struct in_msource		*ims;
2124	struct in_multi			*inm;
2125	size_t				 idx;
2126	int				 error, is_final;
2127
2128	ifp = NULL;
2129	error = 0;
2130	is_final = 1;
2131
2132	memset(&gsr, 0, sizeof(struct group_source_req));
2133	gsa = (sockunion_t *)&gsr.gsr_group;
2134	gsa->ss.ss_family = AF_UNSPEC;
2135	ssa = (sockunion_t *)&gsr.gsr_source;
2136	ssa->ss.ss_family = AF_UNSPEC;
2137
2138	switch (sopt->sopt_name) {
2139	case IP_DROP_MEMBERSHIP:
2140	case IP_DROP_SOURCE_MEMBERSHIP:
2141		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2142			error = sooptcopyin(sopt, &mreqs,
2143			    sizeof(struct ip_mreq),
2144			    sizeof(struct ip_mreq));
2145			/*
2146			 * Swap interface and sourceaddr arguments,
2147			 * as ip_mreq and ip_mreq_source are laid
2148			 * out differently.
2149			 */
2150			mreqs.imr_interface = mreqs.imr_sourceaddr;
2151			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2152		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2153			error = sooptcopyin(sopt, &mreqs,
2154			    sizeof(struct ip_mreq_source),
2155			    sizeof(struct ip_mreq_source));
2156		}
2157		if (error)
2158			return (error);
2159
2160		gsa->sin.sin_family = AF_INET;
2161		gsa->sin.sin_len = sizeof(struct sockaddr_in);
2162		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2163
2164		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2165			ssa->sin.sin_family = AF_INET;
2166			ssa->sin.sin_len = sizeof(struct sockaddr_in);
2167			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2168		}
2169
2170		if (!in_nullhost(gsa->sin.sin_addr))
2171			INADDR_TO_IFP(mreqs.imr_interface, ifp);
2172
2173		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
2174		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
2175
2176		break;
2177
2178	case MCAST_LEAVE_GROUP:
2179	case MCAST_LEAVE_SOURCE_GROUP:
2180		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2181			error = sooptcopyin(sopt, &gsr,
2182			    sizeof(struct group_req),
2183			    sizeof(struct group_req));
2184		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2185			error = sooptcopyin(sopt, &gsr,
2186			    sizeof(struct group_source_req),
2187			    sizeof(struct group_source_req));
2188		}
2189		if (error)
2190			return (error);
2191
2192		if (gsa->sin.sin_family != AF_INET ||
2193		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2194			return (EINVAL);
2195
2196		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2197			if (ssa->sin.sin_family != AF_INET ||
2198			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2199				return (EINVAL);
2200		}
2201
2202		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2203			return (EADDRNOTAVAIL);
2204
2205		ifp = ifnet_byindex(gsr.gsr_interface);
2206		break;
2207
2208	default:
2209		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2210		    __func__, sopt->sopt_name);
2211		return (EOPNOTSUPP);
2212		break;
2213	}
2214
2215	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2216		return (EINVAL);
2217
2218	if (ifp)
2219		IFF_LOCKGIANT(ifp);
2220
2221	/*
2222	 * Find the membership in the membership array.
2223	 */
2224	imo = inp_findmoptions(inp);
2225	idx = imo_match_group(imo, ifp, &gsa->sa);
2226	if (idx == -1) {
2227		error = EADDRNOTAVAIL;
2228		goto out_inp_locked;
2229	}
2230	inm = imo->imo_membership[idx];
2231	imf = &imo->imo_mfilters[idx];
2232
2233	if (ssa->ss.ss_family != AF_UNSPEC)
2234		is_final = 0;
2235
2236	/*
2237	 * Begin state merge transaction at socket layer.
2238	 */
2239	INP_WLOCK_ASSERT(inp);
2240
2241	/*
2242	 * If we were instructed only to leave a given source, do so.
2243	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2244	 */
2245	if (is_final) {
2246		imf_leave(imf);
2247	} else {
2248		if (imf->imf_st[0] == MCAST_EXCLUDE) {
2249			error = EADDRNOTAVAIL;
2250			goto out_inp_locked;
2251		}
2252		ims = imo_match_source(imo, idx, &ssa->sa);
2253		if (ims == NULL) {
2254			CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
2255			    inet_ntoa(ssa->sin.sin_addr), "not ");
2256			error = EADDRNOTAVAIL;
2257			goto out_inp_locked;
2258		}
2259		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2260		error = imf_prune(imf, &ssa->sin);
2261		if (error) {
2262			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2263			    __func__);
2264			goto out_inp_locked;
2265		}
2266	}
2267
2268	/*
2269	 * Begin state merge transaction at IGMP layer.
2270	 */
2271	IN_MULTI_LOCK();
2272
2273	if (is_final) {
2274		/*
2275		 * Give up the multicast address record to which
2276		 * the membership points.
2277		 */
2278		(void)in_leavegroup_locked(inm, imf);
2279	} else {
2280		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2281		error = inm_merge(inm, imf);
2282		if (error) {
2283			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2284			    __func__);
2285			goto out_imf_rollback;
2286		}
2287
2288		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2289		error = igmp_change_state(inm);
2290		if (error) {
2291			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2292			    __func__);
2293		}
2294	}
2295
2296	IN_MULTI_UNLOCK();
2297
2298out_imf_rollback:
2299	if (error)
2300		imf_rollback(imf);
2301	else
2302		imf_commit(imf);
2303
2304	imf_reap(imf);
2305
2306	if (is_final) {
2307		/* Remove the gap in the membership array. */
2308		for (++idx; idx < imo->imo_num_memberships; ++idx)
2309			imo->imo_membership[idx-1] = imo->imo_membership[idx];
2310		imo->imo_num_memberships--;
2311	}
2312
2313out_inp_locked:
2314	INP_WUNLOCK(inp);
2315	if (ifp)
2316		IFF_UNLOCKGIANT(ifp);
2317	return (error);
2318}
2319
2320/*
2321 * Select the interface for transmitting IPv4 multicast datagrams.
2322 *
2323 * Either an instance of struct in_addr or an instance of struct ip_mreqn
2324 * may be passed to this socket option. An address of INADDR_ANY or an
2325 * interface index of 0 is used to remove a previous selection.
2326 * When no interface is selected, one is chosen for every send.
2327 */
2328static int
2329inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2330{
2331	INIT_VNET_NET(curvnet);
2332	struct in_addr		 addr;
2333	struct ip_mreqn		 mreqn;
2334	struct ifnet		*ifp;
2335	struct ip_moptions	*imo;
2336	int			 error;
2337
2338	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2339		/*
2340		 * An interface index was specified using the
2341		 * Linux-derived ip_mreqn structure.
2342		 */
2343		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2344		    sizeof(struct ip_mreqn));
2345		if (error)
2346			return (error);
2347
2348		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
2349			return (EINVAL);
2350
2351		if (mreqn.imr_ifindex == 0) {
2352			ifp = NULL;
2353		} else {
2354			ifp = ifnet_byindex(mreqn.imr_ifindex);
2355			if (ifp == NULL)
2356				return (EADDRNOTAVAIL);
2357		}
2358	} else {
2359		/*
2360		 * An interface was specified by IPv4 address.
2361		 * This is the traditional BSD usage.
2362		 */
2363		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2364		    sizeof(struct in_addr));
2365		if (error)
2366			return (error);
2367		if (in_nullhost(addr)) {
2368			ifp = NULL;
2369		} else {
2370			INADDR_TO_IFP(addr, ifp);
2371			if (ifp == NULL)
2372				return (EADDRNOTAVAIL);
2373		}
2374		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
2375		    inet_ntoa(addr));
2376	}
2377
2378	/* Reject interfaces which do not support multicast. */
2379	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2380		return (EOPNOTSUPP);
2381
2382	imo = inp_findmoptions(inp);
2383	imo->imo_multicast_ifp = ifp;
2384	imo->imo_multicast_addr.s_addr = INADDR_ANY;
2385	INP_WUNLOCK(inp);
2386
2387	return (0);
2388}
2389
2390/*
2391 * Atomically set source filters on a socket for an IPv4 multicast group.
2392 *
2393 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2394 */
2395static int
2396inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2397{
2398	INIT_VNET_NET(curvnet);
2399	struct __msfilterreq	 msfr;
2400	sockunion_t		*gsa;
2401	struct ifnet		*ifp;
2402	struct in_mfilter	*imf;
2403	struct ip_moptions	*imo;
2404	struct in_multi		*inm;
2405	size_t			 idx;
2406	int			 error;
2407
2408	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2409	    sizeof(struct __msfilterreq));
2410	if (error)
2411		return (error);
2412
2413	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc ||
2414	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
2415	     msfr.msfr_fmode != MCAST_INCLUDE))
2416		return (EINVAL);
2417
2418	if (msfr.msfr_group.ss_family != AF_INET ||
2419	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2420		return (EINVAL);
2421
2422	gsa = (sockunion_t *)&msfr.msfr_group;
2423	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2424		return (EINVAL);
2425
2426	gsa->sin.sin_port = 0;	/* ignore port */
2427
2428	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2429		return (EADDRNOTAVAIL);
2430
2431	ifp = ifnet_byindex(msfr.msfr_ifindex);
2432	if (ifp == NULL)
2433		return (EADDRNOTAVAIL);
2434
2435	IFF_LOCKGIANT(ifp);
2436
2437	/*
2438	 * Take the INP write lock.
2439	 * Check if this socket is a member of this group.
2440	 */
2441	imo = inp_findmoptions(inp);
2442	idx = imo_match_group(imo, ifp, &gsa->sa);
2443	if (idx == -1 || imo->imo_mfilters == NULL) {
2444		error = EADDRNOTAVAIL;
2445		goto out_inp_locked;
2446	}
2447	inm = imo->imo_membership[idx];
2448	imf = &imo->imo_mfilters[idx];
2449
2450	/*
2451	 * Begin state merge transaction at socket layer.
2452	 */
2453	INP_WLOCK_ASSERT(inp);
2454
2455	imf->imf_st[1] = msfr.msfr_fmode;
2456
2457	/*
2458	 * Apply any new source filters, if present.
2459	 * Make a copy of the user-space source vector so
2460	 * that we may copy them with a single copyin. This
2461	 * allows us to deal with page faults up-front.
2462	 */
2463	if (msfr.msfr_nsrcs > 0) {
2464		struct in_msource	*lims;
2465		struct sockaddr_in	*psin;
2466		struct sockaddr_storage	*kss, *pkss;
2467		int			 i;
2468
2469		INP_WUNLOCK(inp);
2470
2471		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2472		    __func__, (unsigned long)msfr.msfr_nsrcs);
2473		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2474		    M_TEMP, M_WAITOK);
2475		error = copyin(msfr.msfr_srcs, kss,
2476		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2477		if (error) {
2478			free(kss, M_TEMP);
2479			return (error);
2480		}
2481
2482		INP_WLOCK(inp);
2483
2484		/*
2485		 * Mark all source filters as UNDEFINED at t1.
2486		 * Restore new group filter mode, as imf_leave()
2487		 * will set it to INCLUDE.
2488		 */
2489		imf_leave(imf);
2490		imf->imf_st[1] = msfr.msfr_fmode;
2491
2492		/*
2493		 * Update socket layer filters at t1, lazy-allocating
2494		 * new entries. This saves a bunch of memory at the
2495		 * cost of one RB_FIND() per source entry; duplicate
2496		 * entries in the msfr_nsrcs vector are ignored.
2497		 * If we encounter an error, rollback transaction.
2498		 *
2499		 * XXX This too could be replaced with a set-symmetric
2500		 * difference like loop to avoid walking from root
2501		 * every time, as the key space is common.
2502		 */
2503		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2504			psin = (struct sockaddr_in *)pkss;
2505			if (psin->sin_family != AF_INET) {
2506				error = EAFNOSUPPORT;
2507				break;
2508			}
2509			if (psin->sin_len != sizeof(struct sockaddr_in)) {
2510				error = EINVAL;
2511				break;
2512			}
2513			error = imf_get_source(imf, psin, &lims);
2514			if (error)
2515				break;
2516			lims->imsl_st[1] = imf->imf_st[1];
2517		}
2518		free(kss, M_TEMP);
2519	}
2520
2521	if (error)
2522		goto out_imf_rollback;
2523
2524	INP_WLOCK_ASSERT(inp);
2525	IN_MULTI_LOCK();
2526
2527	/*
2528	 * Begin state merge transaction at IGMP layer.
2529	 */
2530	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2531	error = inm_merge(inm, imf);
2532	if (error) {
2533		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2534		goto out_imf_rollback;
2535	}
2536
2537	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2538	error = igmp_change_state(inm);
2539	if (error)
2540		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2541
2542	IN_MULTI_UNLOCK();
2543
2544out_imf_rollback:
2545	if (error)
2546		imf_rollback(imf);
2547	else
2548		imf_commit(imf);
2549
2550	imf_reap(imf);
2551
2552out_inp_locked:
2553	INP_WUNLOCK(inp);
2554	IFF_UNLOCKGIANT(ifp);
2555	return (error);
2556}
2557
2558/*
2559 * Set the IP multicast options in response to user setsockopt().
2560 *
2561 * Many of the socket options handled in this function duplicate the
2562 * functionality of socket options in the regular unicast API. However,
2563 * it is not possible to merge the duplicate code, because the idempotence
2564 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2565 * the effects of these options must be treated as separate and distinct.
2566 *
2567 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2568 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2569 * is refactored to no longer use vifs.
2570 */
2571int
2572inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2573{
2574	struct ip_moptions	*imo;
2575	int			 error;
2576
2577	error = 0;
2578
2579	/*
2580	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2581	 * or is a divert socket, reject it.
2582	 */
2583	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2584	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2585	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2586		return (EOPNOTSUPP);
2587
2588	switch (sopt->sopt_name) {
2589	case IP_MULTICAST_VIF: {
2590		int vifi;
2591		/*
2592		 * Select a multicast VIF for transmission.
2593		 * Only useful if multicast forwarding is active.
2594		 */
2595		if (legal_vif_num == NULL) {
2596			error = EOPNOTSUPP;
2597			break;
2598		}
2599		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2600		if (error)
2601			break;
2602		if (!legal_vif_num(vifi) && (vifi != -1)) {
2603			error = EINVAL;
2604			break;
2605		}
2606		imo = inp_findmoptions(inp);
2607		imo->imo_multicast_vif = vifi;
2608		INP_WUNLOCK(inp);
2609		break;
2610	}
2611
2612	case IP_MULTICAST_IF:
2613		error = inp_set_multicast_if(inp, sopt);
2614		break;
2615
2616	case IP_MULTICAST_TTL: {
2617		u_char ttl;
2618
2619		/*
2620		 * Set the IP time-to-live for outgoing multicast packets.
2621		 * The original multicast API required a char argument,
2622		 * which is inconsistent with the rest of the socket API.
2623		 * We allow either a char or an int.
2624		 */
2625		if (sopt->sopt_valsize == sizeof(u_char)) {
2626			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2627			    sizeof(u_char));
2628			if (error)
2629				break;
2630		} else {
2631			u_int ittl;
2632
2633			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2634			    sizeof(u_int));
2635			if (error)
2636				break;
2637			if (ittl > 255) {
2638				error = EINVAL;
2639				break;
2640			}
2641			ttl = (u_char)ittl;
2642		}
2643		imo = inp_findmoptions(inp);
2644		imo->imo_multicast_ttl = ttl;
2645		INP_WUNLOCK(inp);
2646		break;
2647	}
2648
2649	case IP_MULTICAST_LOOP: {
2650		u_char loop;
2651
2652		/*
2653		 * Set the loopback flag for outgoing multicast packets.
2654		 * Must be zero or one.  The original multicast API required a
2655		 * char argument, which is inconsistent with the rest
2656		 * of the socket API.  We allow either a char or an int.
2657		 */
2658		if (sopt->sopt_valsize == sizeof(u_char)) {
2659			error = sooptcopyin(sopt, &loop, sizeof(u_char),
2660			    sizeof(u_char));
2661			if (error)
2662				break;
2663		} else {
2664			u_int iloop;
2665
2666			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2667					    sizeof(u_int));
2668			if (error)
2669				break;
2670			loop = (u_char)iloop;
2671		}
2672		imo = inp_findmoptions(inp);
2673		imo->imo_multicast_loop = !!loop;
2674		INP_WUNLOCK(inp);
2675		break;
2676	}
2677
2678	case IP_ADD_MEMBERSHIP:
2679	case IP_ADD_SOURCE_MEMBERSHIP:
2680	case MCAST_JOIN_GROUP:
2681	case MCAST_JOIN_SOURCE_GROUP:
2682		error = inp_join_group(inp, sopt);
2683		break;
2684
2685	case IP_DROP_MEMBERSHIP:
2686	case IP_DROP_SOURCE_MEMBERSHIP:
2687	case MCAST_LEAVE_GROUP:
2688	case MCAST_LEAVE_SOURCE_GROUP:
2689		error = inp_leave_group(inp, sopt);
2690		break;
2691
2692	case IP_BLOCK_SOURCE:
2693	case IP_UNBLOCK_SOURCE:
2694	case MCAST_BLOCK_SOURCE:
2695	case MCAST_UNBLOCK_SOURCE:
2696		error = inp_block_unblock_source(inp, sopt);
2697		break;
2698
2699	case IP_MSFILTER:
2700		error = inp_set_source_filters(inp, sopt);
2701		break;
2702
2703	default:
2704		error = EOPNOTSUPP;
2705		break;
2706	}
2707
2708	INP_UNLOCK_ASSERT(inp);
2709
2710	return (error);
2711}
2712
2713/*
2714 * Expose IGMP's multicast filter mode and source list(s) to userland,
2715 * keyed by (ifindex, group).
2716 * The filter mode is written out as a uint32_t, followed by
2717 * 0..n of struct in_addr.
2718 * For use by ifmcstat(8).
2719 * SMPng: NOTE: unlocked read of ifindex space.
2720 */
2721static int
2722sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2723{
2724	INIT_VNET_NET(curvnet);
2725	struct in_addr			 src, group;
2726	struct ifnet			*ifp;
2727	struct ifmultiaddr		*ifma;
2728	struct in_multi			*inm;
2729	struct ip_msource		*ims;
2730	int				*name;
2731	int				 retval;
2732	u_int				 namelen;
2733	uint32_t			 fmode, ifindex;
2734
2735	name = (int *)arg1;
2736	namelen = arg2;
2737
2738	if (req->newptr != NULL)
2739		return (EPERM);
2740
2741	if (namelen != 2)
2742		return (EINVAL);
2743
2744	ifindex = name[0];
2745	if (ifindex <= 0 || ifindex > V_if_index) {
2746		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
2747		    __func__, ifindex);
2748		return (ENOENT);
2749	}
2750
2751	group.s_addr = name[1];
2752	if (!IN_MULTICAST(ntohl(group.s_addr))) {
2753		CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
2754		    __func__, inet_ntoa(group));
2755		return (EINVAL);
2756	}
2757
2758	ifp = ifnet_byindex(ifindex);
2759	if (ifp == NULL) {
2760		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2761		    __func__, ifindex);
2762		return (ENOENT);
2763	}
2764
2765	retval = sysctl_wire_old_buffer(req,
2766	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2767	if (retval)
2768		return (retval);
2769
2770	IN_MULTI_LOCK();
2771
2772	IF_ADDR_LOCK(ifp);
2773	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2774		if (ifma->ifma_addr->sa_family != AF_INET ||
2775		    ifma->ifma_protospec == NULL)
2776			continue;
2777		inm = (struct in_multi *)ifma->ifma_protospec;
2778		if (!in_hosteq(inm->inm_addr, group))
2779			continue;
2780		fmode = inm->inm_st[1].iss_fmode;
2781		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2782		if (retval != 0)
2783			break;
2784		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2785#ifdef KTR
2786			struct in_addr ina;
2787			ina.s_addr = htonl(ims->ims_haddr);
2788			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
2789			    inet_ntoa(ina));
2790#endif
2791			/*
2792			 * Only copy-out sources which are in-mode.
2793			 */
2794			if (fmode != ims_get_mode(inm, ims, 1)) {
2795				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2796				    __func__);
2797				continue;
2798			}
2799			src.s_addr = htonl(ims->ims_haddr);
2800			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2801			if (retval != 0)
2802				break;
2803		}
2804	}
2805	IF_ADDR_UNLOCK(ifp);
2806
2807	IN_MULTI_UNLOCK();
2808
2809	return (retval);
2810}
2811
2812#ifdef KTR
2813
2814static const char *inm_modestrs[] = { "un", "in", "ex" };
2815
2816static const char *
2817inm_mode_str(const int mode)
2818{
2819
2820	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2821		return (inm_modestrs[mode]);
2822	return ("??");
2823}
2824
2825static const char *inm_statestrs[] = {
2826	"not-member",
2827	"silent",
2828	"idle",
2829	"lazy",
2830	"sleeping",
2831	"awakening",
2832	"query-pending",
2833	"sg-query-pending",
2834	"leaving"
2835};
2836
2837static const char *
2838inm_state_str(const int state)
2839{
2840
2841	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
2842		return (inm_statestrs[state]);
2843	return ("??");
2844}
2845
2846/*
2847 * Dump an in_multi structure to the console.
2848 */
2849void
2850inm_print(const struct in_multi *inm)
2851{
2852	int t;
2853
2854	if ((KTR_COMPILE & KTR_IGMPV3) == 0)
2855		return;
2856
2857	printf("%s: --- begin inm %p ---\n", __func__, inm);
2858	printf("addr %s ifp %p(%s) ifma %p\n",
2859	    inet_ntoa(inm->inm_addr),
2860	    inm->inm_ifp,
2861	    inm->inm_ifp->if_xname,
2862	    inm->inm_ifma);
2863	printf("timer %u state %s refcount %u scq.len %u\n",
2864	    inm->inm_timer,
2865	    inm_state_str(inm->inm_state),
2866	    inm->inm_refcount,
2867	    inm->inm_scq.ifq_len);
2868	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
2869	    inm->inm_igi,
2870	    inm->inm_nsrc,
2871	    inm->inm_sctimer,
2872	    inm->inm_scrv);
2873	for (t = 0; t < 2; t++) {
2874		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2875		    inm_mode_str(inm->inm_st[t].iss_fmode),
2876		    inm->inm_st[t].iss_asm,
2877		    inm->inm_st[t].iss_ex,
2878		    inm->inm_st[t].iss_in,
2879		    inm->inm_st[t].iss_rec);
2880	}
2881	printf("%s: --- end inm %p ---\n", __func__, inm);
2882}
2883
2884#else /* !KTR */
2885
2886void
2887inm_print(const struct in_multi *inm)
2888{
2889
2890}
2891
2892#endif /* KTR */
2893
2894RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
2895