in_mcast.c revision 178888
1/*-
2 * Copyright (c) 2007 Bruce M. Simpson.
3 * Copyright (c) 2005 Robert N. M. Watson.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote
15 *    products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * IPv4 multicast socket, group, and socket option processing module.
33 * Until further notice, this file requires INET to compile.
34 * TODO: Make this infrastructure independent of address family.
35 * TODO: Teach netinet6 to use this code.
36 * TODO: Hook up SSM logic to IGMPv3/MLDv2.
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: head/sys/netinet/in_mcast.c 178888 2008-05-09 23:03:00Z julian $");
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/protosw.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/sysctl.h>
51
52#include <net/if.h>
53#include <net/if_dl.h>
54#include <net/route.h>
55
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/in_pcb.h>
59#include <netinet/in_var.h>
60#include <netinet/ip_var.h>
61#include <netinet/igmp_var.h>
62
63#ifndef __SOCKUNION_DECLARED
64union sockunion {
65	struct sockaddr_storage	ss;
66	struct sockaddr		sa;
67	struct sockaddr_dl	sdl;
68	struct sockaddr_in	sin;
69#ifdef INET6
70	struct sockaddr_in6	sin6;
71#endif
72};
73typedef union sockunion sockunion_t;
74#define __SOCKUNION_DECLARED
75#endif /* __SOCKUNION_DECLARED */
76
77static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
78static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
79static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
80
81/*
82 * The IPv4 multicast list (in_multihead and associated structures) are
83 * protected by the global in_multi_mtx.  See in_var.h for more details.  For
84 * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
85 * ip_output() to send IGMP packets while holding the lock; this probably is
86 * not quite desirable.
87 */
88struct in_multihead in_multihead;	/* XXX BSS initialization */
89struct mtx in_multi_mtx;
90MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
91
92/*
93 * Functions with non-static linkage defined in this file should be
94 * declared in in_var.h:
95 *  imo_match_group()
96 *  imo_match_source()
97 *  in_addmulti()
98 *  in_delmulti()
99 *  in_delmulti_locked()
100 * and ip_var.h:
101 *  inp_freemoptions()
102 *  inp_getmoptions()
103 *  inp_setmoptions()
104 */
105static int	imo_grow(struct ip_moptions *);
106static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
107static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
108static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
109static struct ip_moptions *
110		inp_findmoptions(struct inpcb *);
111static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
112static int	inp_join_group(struct inpcb *, struct sockopt *);
113static int	inp_leave_group(struct inpcb *, struct sockopt *);
114static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
115static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
116
117/*
118 * Resize the ip_moptions vector to the next power-of-two minus 1.
119 * May be called with locks held; do not sleep.
120 */
121static int
122imo_grow(struct ip_moptions *imo)
123{
124	struct in_multi		**nmships;
125	struct in_multi		**omships;
126	struct in_mfilter	 *nmfilters;
127	struct in_mfilter	 *omfilters;
128	size_t			  idx;
129	size_t			  newmax;
130	size_t			  oldmax;
131
132	nmships = NULL;
133	nmfilters = NULL;
134	omships = imo->imo_membership;
135	omfilters = imo->imo_mfilters;
136	oldmax = imo->imo_max_memberships;
137	newmax = ((oldmax + 1) * 2) - 1;
138
139	if (newmax <= IP_MAX_MEMBERSHIPS) {
140		nmships = (struct in_multi **)realloc(omships,
141		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
142		nmfilters = (struct in_mfilter *)realloc(omfilters,
143		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
144		if (nmships != NULL && nmfilters != NULL) {
145			/* Initialize newly allocated source filter heads. */
146			for (idx = oldmax; idx < newmax; idx++) {
147				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
148				nmfilters[idx].imf_nsources = 0;
149				TAILQ_INIT(&nmfilters[idx].imf_sources);
150			}
151			imo->imo_max_memberships = newmax;
152			imo->imo_membership = nmships;
153			imo->imo_mfilters = nmfilters;
154		}
155	}
156
157	if (nmships == NULL || nmfilters == NULL) {
158		if (nmships != NULL)
159			free(nmships, M_IPMOPTS);
160		if (nmfilters != NULL)
161			free(nmfilters, M_IPMSOURCE);
162		return (ETOOMANYREFS);
163	}
164
165	return (0);
166}
167
168/*
169 * Add a source to a multicast filter list.
170 * Assumes the associated inpcb is locked.
171 */
172static int
173imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
174{
175	struct in_msource	*ims, *nims;
176	struct in_mfilter	*imf;
177
178	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
179	KASSERT(imo->imo_mfilters != NULL,
180	    ("%s: imo_mfilters vector not allocated", __func__));
181
182	imf = &imo->imo_mfilters[gidx];
183	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
184		return (ENOBUFS);
185
186	ims = imo_match_source(imo, gidx, &src->sa);
187	if (ims != NULL)
188		return (EADDRNOTAVAIL);
189
190	/* Do not sleep with inp lock held. */
191	MALLOC(nims, struct in_msource *, sizeof(struct in_msource),
192	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
193	if (nims == NULL)
194		return (ENOBUFS);
195
196	nims->ims_addr = src->ss;
197	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
198	imf->imf_nsources++;
199
200	return (0);
201}
202
203static int
204imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
205{
206	struct in_msource	*ims;
207	struct in_mfilter	*imf;
208
209	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
210	KASSERT(imo->imo_mfilters != NULL,
211	    ("%s: imo_mfilters vector not allocated", __func__));
212
213	imf = &imo->imo_mfilters[gidx];
214	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
215		return (ENOBUFS);
216
217	ims = imo_match_source(imo, gidx, &src->sa);
218	if (ims == NULL)
219		return (EADDRNOTAVAIL);
220
221	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
222	FREE(ims, M_IPMSOURCE);
223	imf->imf_nsources--;
224
225	return (0);
226}
227
228/*
229 * Find an IPv4 multicast group entry for this ip_moptions instance
230 * which matches the specified group, and optionally an interface.
231 * Return its index into the array, or -1 if not found.
232 */
233size_t
234imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
235    struct sockaddr *group)
236{
237	sockunion_t	 *gsa;
238	struct in_multi	**pinm;
239	int		  idx;
240	int		  nmships;
241
242	gsa = (sockunion_t *)group;
243
244	/* The imo_membership array may be lazy allocated. */
245	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
246		return (-1);
247
248	nmships = imo->imo_num_memberships;
249	pinm = &imo->imo_membership[0];
250	for (idx = 0; idx < nmships; idx++, pinm++) {
251		if (*pinm == NULL)
252			continue;
253#if 0
254		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
255		    ifp, inet_ntoa(gsa->sin.sin_addr));
256		printf("against %p, %s\n",
257		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
258#endif
259		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
260		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
261			break;
262		}
263	}
264	if (idx >= nmships)
265		idx = -1;
266
267	return (idx);
268}
269
270/*
271 * Find a multicast source entry for this imo which matches
272 * the given group index for this socket, and source address.
273 */
274struct in_msource *
275imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
276{
277	struct in_mfilter	*imf;
278	struct in_msource	*ims, *pims;
279
280	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
281	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
282	    ("%s: invalid index %d\n", __func__, (int)gidx));
283
284	/* The imo_mfilters array may be lazy allocated. */
285	if (imo->imo_mfilters == NULL)
286		return (NULL);
287
288	pims = NULL;
289	imf = &imo->imo_mfilters[gidx];
290	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
291		/*
292		 * Perform bitwise comparison of two IPv4 addresses.
293		 * TODO: Do the same for IPv6.
294		 * Do not use sa_equal() for this as it is not aware of
295		 * deeper structure in sockaddr_in or sockaddr_in6.
296		 */
297		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
298		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
299			pims = ims;
300			break;
301		}
302	}
303
304	return (pims);
305}
306
307/*
308 * Join an IPv4 multicast group.
309 */
310struct in_multi *
311in_addmulti(struct in_addr *ap, struct ifnet *ifp)
312{
313	struct in_multi *inm;
314
315	inm = NULL;
316
317	IFF_LOCKGIANT(ifp);
318	IN_MULTI_LOCK();
319
320	IN_LOOKUP_MULTI(*ap, ifp, inm);
321	if (inm != NULL) {
322		/*
323		 * If we already joined this group, just bump the
324		 * refcount and return it.
325		 */
326		KASSERT(inm->inm_refcount >= 1,
327		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
328		++inm->inm_refcount;
329	} else do {
330		sockunion_t		 gsa;
331		struct ifmultiaddr	*ifma;
332		struct in_multi		*ninm;
333		int			 error;
334
335		memset(&gsa, 0, sizeof(gsa));
336		gsa.sin.sin_family = AF_INET;
337		gsa.sin.sin_len = sizeof(struct sockaddr_in);
338		gsa.sin.sin_addr = *ap;
339
340		/*
341		 * Check if a link-layer group is already associated
342		 * with this network-layer group on the given ifnet.
343		 * If so, bump the refcount on the existing network-layer
344		 * group association and return it.
345		 */
346		error = if_addmulti(ifp, &gsa.sa, &ifma);
347		if (error)
348			break;
349		if (ifma->ifma_protospec != NULL) {
350			inm = (struct in_multi *)ifma->ifma_protospec;
351#ifdef INVARIANTS
352			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
353			    inm->inm_addr.s_addr != ap->s_addr)
354				panic("%s: ifma is inconsistent", __func__);
355#endif
356			++inm->inm_refcount;
357			break;
358		}
359
360		/*
361		 * A new membership is needed; construct it and
362		 * perform the IGMP join.
363		 */
364		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
365		if (ninm == NULL) {
366			if_delmulti_ifma(ifma);
367			break;
368		}
369		ninm->inm_addr = *ap;
370		ninm->inm_ifp = ifp;
371		ninm->inm_ifma = ifma;
372		ninm->inm_refcount = 1;
373		ifma->ifma_protospec = ninm;
374		LIST_INSERT_HEAD(&in_multihead, ninm, inm_link);
375
376		igmp_joingroup(ninm);
377
378		inm = ninm;
379	} while (0);
380
381	IN_MULTI_UNLOCK();
382	IFF_UNLOCKGIANT(ifp);
383
384	return (inm);
385}
386
387/*
388 * Leave an IPv4 multicast group.
389 * It is OK to call this routine if the underlying ifnet went away.
390 *
391 * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
392 * will set ifma_ifp to NULL when the associated ifnet instance is detached
393 * from the system.
394 *
395 * The only reason we need to violate layers and check ifma_ifp here at all
396 * is because certain hardware drivers still require Giant to be held,
397 * and it must always be taken before other locks.
398 */
399void
400in_delmulti(struct in_multi *inm)
401{
402	struct ifnet *ifp;
403
404	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
405	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
406	ifp = inm->inm_ifma->ifma_ifp;
407
408	if (ifp != NULL) {
409		/*
410		 * Sanity check that netinet's notion of ifp is the
411		 * same as net's.
412		 */
413		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
414		IFF_LOCKGIANT(ifp);
415	}
416
417	IN_MULTI_LOCK();
418	in_delmulti_locked(inm);
419	IN_MULTI_UNLOCK();
420
421	if (ifp != NULL)
422		IFF_UNLOCKGIANT(ifp);
423}
424
425/*
426 * Delete a multicast address record, with locks held.
427 *
428 * It is OK to call this routine if the ifp went away.
429 * Assumes that caller holds the IN_MULTI lock, and that
430 * Giant was taken before other locks if required by the hardware.
431 */
432void
433in_delmulti_locked(struct in_multi *inm)
434{
435	struct ifmultiaddr *ifma;
436
437	IN_MULTI_LOCK_ASSERT();
438	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
439
440	if (--inm->inm_refcount == 0) {
441		igmp_leavegroup(inm);
442
443		ifma = inm->inm_ifma;
444#ifdef DIAGNOSTIC
445		if (bootverbose)
446			printf("%s: purging ifma %p\n", __func__, ifma);
447#endif
448		KASSERT(ifma->ifma_protospec == inm,
449		    ("%s: ifma_protospec != inm", __func__));
450		ifma->ifma_protospec = NULL;
451
452		LIST_REMOVE(inm, inm_link);
453		free(inm, M_IPMADDR);
454
455		if_delmulti_ifma(ifma);
456	}
457}
458
459/*
460 * Block or unblock an ASM/SSM multicast source on an inpcb.
461 */
462static int
463inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
464{
465	struct group_source_req		 gsr;
466	sockunion_t			*gsa, *ssa;
467	struct ifnet			*ifp;
468	struct in_mfilter		*imf;
469	struct ip_moptions		*imo;
470	struct in_msource		*ims;
471	size_t				 idx;
472	int				 error;
473	int				 block;
474
475	ifp = NULL;
476	error = 0;
477	block = 0;
478
479	memset(&gsr, 0, sizeof(struct group_source_req));
480	gsa = (sockunion_t *)&gsr.gsr_group;
481	ssa = (sockunion_t *)&gsr.gsr_source;
482
483	switch (sopt->sopt_name) {
484	case IP_BLOCK_SOURCE:
485	case IP_UNBLOCK_SOURCE: {
486		struct ip_mreq_source	 mreqs;
487
488		error = sooptcopyin(sopt, &mreqs,
489		    sizeof(struct ip_mreq_source),
490		    sizeof(struct ip_mreq_source));
491		if (error)
492			return (error);
493
494		gsa->sin.sin_family = AF_INET;
495		gsa->sin.sin_len = sizeof(struct sockaddr_in);
496		gsa->sin.sin_addr = mreqs.imr_multiaddr;
497
498		ssa->sin.sin_family = AF_INET;
499		ssa->sin.sin_len = sizeof(struct sockaddr_in);
500		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
501
502		if (mreqs.imr_interface.s_addr != INADDR_ANY)
503			INADDR_TO_IFP(mreqs.imr_interface, ifp);
504
505		if (sopt->sopt_name == IP_BLOCK_SOURCE)
506			block = 1;
507
508#ifdef DIAGNOSTIC
509		if (bootverbose) {
510			printf("%s: imr_interface = %s, ifp = %p\n",
511			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
512		}
513#endif
514		break;
515	    }
516
517	case MCAST_BLOCK_SOURCE:
518	case MCAST_UNBLOCK_SOURCE:
519		error = sooptcopyin(sopt, &gsr,
520		    sizeof(struct group_source_req),
521		    sizeof(struct group_source_req));
522		if (error)
523			return (error);
524
525		if (gsa->sin.sin_family != AF_INET ||
526		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
527			return (EINVAL);
528
529		if (ssa->sin.sin_family != AF_INET ||
530		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
531			return (EINVAL);
532
533		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
534			return (EADDRNOTAVAIL);
535
536		ifp = ifnet_byindex(gsr.gsr_interface);
537
538		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
539			block = 1;
540		break;
541
542	default:
543#ifdef DIAGNOSTIC
544		if (bootverbose) {
545			printf("%s: unknown sopt_name %d\n", __func__,
546			    sopt->sopt_name);
547		}
548#endif
549		return (EOPNOTSUPP);
550		break;
551	}
552
553	/* XXX INET6 */
554	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
555		return (EINVAL);
556
557	/*
558	 * Check if we are actually a member of this group.
559	 */
560	imo = inp_findmoptions(inp);
561	idx = imo_match_group(imo, ifp, &gsa->sa);
562	if (idx == -1 || imo->imo_mfilters == NULL) {
563		error = EADDRNOTAVAIL;
564		goto out_locked;
565	}
566
567	KASSERT(imo->imo_mfilters != NULL,
568	    ("%s: imo_mfilters not allocated", __func__));
569	imf = &imo->imo_mfilters[idx];
570
571	/*
572	 * SSM multicast truth table for block/unblock operations.
573	 *
574	 * Operation   Filter Mode  Entry exists?   Action
575	 *
576	 * block       exclude      no              add source to filter
577	 * unblock     include      no              add source to filter
578	 * block       include      no              EINVAL
579	 * unblock     exclude      no              EINVAL
580	 * block       exclude      yes             EADDRNOTAVAIL
581	 * unblock     include      yes             EADDRNOTAVAIL
582	 * block       include      yes             remove source from filter
583	 * unblock     exclude      yes             remove source from filter
584	 *
585	 * FreeBSD does not explicitly distinguish between ASM and SSM
586	 * mode sockets; all sockets are assumed to have a filter list.
587	 */
588#ifdef DIAGNOSTIC
589	if (bootverbose) {
590		printf("%s: imf_fmode is %s\n", __func__,
591		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
592	}
593#endif
594	ims = imo_match_source(imo, idx, &ssa->sa);
595	if (ims == NULL) {
596		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
597		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
598#ifdef DIAGNOSTIC
599			if (bootverbose) {
600				printf("%s: adding %s to filter list\n",
601				    __func__, inet_ntoa(ssa->sin.sin_addr));
602			}
603#endif
604			error = imo_join_source(imo, idx, ssa);
605		}
606		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
607		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
608			/*
609			 * If the socket is in inclusive mode:
610			 *  the source is already blocked as it has no entry.
611			 * If the socket is in exclusive mode:
612			 *  the source is already unblocked as it has no entry.
613			 */
614#ifdef DIAGNOSTIC
615			if (bootverbose) {
616				printf("%s: ims %p; %s already [un]blocked\n",
617				    __func__, ims,
618				    inet_ntoa(ssa->sin.sin_addr));
619			}
620#endif
621			error = EINVAL;
622		}
623	} else {
624		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
625		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
626			/*
627			 * If the socket is in exclusive mode:
628			 *  the source is already blocked as it has an entry.
629			 * If the socket is in inclusive mode:
630			 *  the source is already unblocked as it has an entry.
631			 */
632#ifdef DIAGNOSTIC
633			if (bootverbose) {
634				printf("%s: ims %p; %s already [un]blocked\n",
635				    __func__, ims,
636				    inet_ntoa(ssa->sin.sin_addr));
637			}
638#endif
639			error = EADDRNOTAVAIL;
640		}
641		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
642		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
643#ifdef DIAGNOSTIC
644			if (bootverbose) {
645				printf("%s: removing %s from filter list\n",
646				    __func__, inet_ntoa(ssa->sin.sin_addr));
647			}
648#endif
649			error = imo_leave_source(imo, idx, ssa);
650		}
651	}
652
653out_locked:
654	INP_WUNLOCK(inp);
655	return (error);
656}
657
658/*
659 * Given an inpcb, return its multicast options structure pointer.  Accepts
660 * an unlocked inpcb pointer, but will return it locked.  May sleep.
661 */
662static struct ip_moptions *
663inp_findmoptions(struct inpcb *inp)
664{
665	struct ip_moptions	 *imo;
666	struct in_multi		**immp;
667	struct in_mfilter	 *imfp;
668	size_t			  idx;
669
670	INP_WLOCK(inp);
671	if (inp->inp_moptions != NULL)
672		return (inp->inp_moptions);
673
674	INP_WUNLOCK(inp);
675
676	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
677	    M_WAITOK);
678	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
679	    M_IPMOPTS, M_WAITOK | M_ZERO);
680	imfp = (struct in_mfilter *)malloc(
681	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
682	    M_IPMSOURCE, M_WAITOK);
683
684	imo->imo_multicast_ifp = NULL;
685	imo->imo_multicast_addr.s_addr = INADDR_ANY;
686	imo->imo_multicast_vif = -1;
687	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
688	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
689	imo->imo_num_memberships = 0;
690	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
691	imo->imo_membership = immp;
692
693	/* Initialize per-group source filters. */
694	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
695		imfp[idx].imf_fmode = MCAST_EXCLUDE;
696		imfp[idx].imf_nsources = 0;
697		TAILQ_INIT(&imfp[idx].imf_sources);
698	}
699	imo->imo_mfilters = imfp;
700
701	INP_WLOCK(inp);
702	if (inp->inp_moptions != NULL) {
703		free(imfp, M_IPMSOURCE);
704		free(immp, M_IPMOPTS);
705		free(imo, M_IPMOPTS);
706		return (inp->inp_moptions);
707	}
708	inp->inp_moptions = imo;
709	return (imo);
710}
711
712/*
713 * Discard the IP multicast options (and source filters).
714 */
715void
716inp_freemoptions(struct ip_moptions *imo)
717{
718	struct in_mfilter	*imf;
719	struct in_msource	*ims, *tims;
720	size_t			 idx, nmships;
721
722	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
723
724	nmships = imo->imo_num_memberships;
725	for (idx = 0; idx < nmships; ++idx) {
726		in_delmulti(imo->imo_membership[idx]);
727
728		if (imo->imo_mfilters != NULL) {
729			imf = &imo->imo_mfilters[idx];
730			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
731			    ims_next, tims) {
732				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
733				FREE(ims, M_IPMSOURCE);
734				imf->imf_nsources--;
735			}
736			KASSERT(imf->imf_nsources == 0,
737			    ("%s: did not free all imf_nsources", __func__));
738		}
739	}
740
741	if (imo->imo_mfilters != NULL)
742		free(imo->imo_mfilters, M_IPMSOURCE);
743	free(imo->imo_membership, M_IPMOPTS);
744	free(imo, M_IPMOPTS);
745}
746
747/*
748 * Atomically get source filters on a socket for an IPv4 multicast group.
749 * Called with INP lock held; returns with lock released.
750 */
751static int
752inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
753{
754	struct __msfilterreq	 msfr;
755	sockunion_t		*gsa;
756	struct ifnet		*ifp;
757	struct ip_moptions	*imo;
758	struct in_mfilter	*imf;
759	struct in_msource	*ims;
760	struct sockaddr_storage	*ptss;
761	struct sockaddr_storage	*tss;
762	int			 error;
763	size_t			 idx;
764
765	INP_WLOCK_ASSERT(inp);
766
767	imo = inp->inp_moptions;
768	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
769
770	INP_WUNLOCK(inp);
771
772	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
773	    sizeof(struct __msfilterreq));
774	if (error)
775		return (error);
776
777	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
778		return (EINVAL);
779
780	ifp = ifnet_byindex(msfr.msfr_ifindex);
781	if (ifp == NULL)
782		return (EINVAL);
783
784	INP_WLOCK(inp);
785
786	/*
787	 * Lookup group on the socket.
788	 */
789	gsa = (sockunion_t *)&msfr.msfr_group;
790	idx = imo_match_group(imo, ifp, &gsa->sa);
791	if (idx == -1 || imo->imo_mfilters == NULL) {
792		INP_WUNLOCK(inp);
793		return (EADDRNOTAVAIL);
794	}
795
796	imf = &imo->imo_mfilters[idx];
797	msfr.msfr_fmode = imf->imf_fmode;
798	msfr.msfr_nsrcs = imf->imf_nsources;
799
800	/*
801	 * If the user specified a buffer, copy out the source filter
802	 * entries to userland gracefully.
803	 * msfr.msfr_nsrcs is always set to the total number of filter
804	 * entries which the kernel currently has for this group.
805	 */
806	tss = NULL;
807	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
808		/*
809		 * Make a copy of the source vector so that we do not
810		 * thrash the inpcb lock whilst copying it out.
811		 * We only copy out the number of entries which userland
812		 * has asked for, but we always tell userland how big the
813		 * buffer really needs to be.
814		 */
815		MALLOC(tss, struct sockaddr_storage *,
816		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
817		    M_TEMP, M_NOWAIT);
818		if (tss == NULL) {
819			error = ENOBUFS;
820		} else {
821			ptss = tss;
822			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
823				memcpy(ptss++, &ims->ims_addr,
824				    sizeof(struct sockaddr_storage));
825			}
826		}
827	}
828
829	INP_WUNLOCK(inp);
830
831	if (tss != NULL) {
832		error = copyout(tss, msfr.msfr_srcs,
833		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
834		FREE(tss, M_TEMP);
835	}
836
837	if (error)
838		return (error);
839
840	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
841
842	return (error);
843}
844
845/*
846 * Return the IP multicast options in response to user getsockopt().
847 */
848int
849inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
850{
851	struct ip_mreqn		 mreqn;
852	struct ip_moptions	*imo;
853	struct ifnet		*ifp;
854	struct in_ifaddr	*ia;
855	int			 error, optval;
856	u_char			 coptval;
857
858	INP_WLOCK(inp);
859	imo = inp->inp_moptions;
860	/*
861	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
862	 * or is a divert socket, reject it.
863	 */
864	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
865	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
866	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
867		INP_WUNLOCK(inp);
868		return (EOPNOTSUPP);
869	}
870
871	error = 0;
872	switch (sopt->sopt_name) {
873	case IP_MULTICAST_VIF:
874		if (imo != NULL)
875			optval = imo->imo_multicast_vif;
876		else
877			optval = -1;
878		INP_WUNLOCK(inp);
879		error = sooptcopyout(sopt, &optval, sizeof(int));
880		break;
881
882	case IP_MULTICAST_IF:
883		memset(&mreqn, 0, sizeof(struct ip_mreqn));
884		if (imo != NULL) {
885			ifp = imo->imo_multicast_ifp;
886			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
887				mreqn.imr_address = imo->imo_multicast_addr;
888			} else if (ifp != NULL) {
889				mreqn.imr_ifindex = ifp->if_index;
890				IFP_TO_IA(ifp, ia);
891				if (ia != NULL) {
892					mreqn.imr_address =
893					    IA_SIN(ia)->sin_addr;
894				}
895			}
896		}
897		INP_WUNLOCK(inp);
898		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
899			error = sooptcopyout(sopt, &mreqn,
900			    sizeof(struct ip_mreqn));
901		} else {
902			error = sooptcopyout(sopt, &mreqn.imr_address,
903			    sizeof(struct in_addr));
904		}
905		break;
906
907	case IP_MULTICAST_TTL:
908		if (imo == 0)
909			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
910		else
911			optval = coptval = imo->imo_multicast_ttl;
912		INP_WUNLOCK(inp);
913		if (sopt->sopt_valsize == sizeof(u_char))
914			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
915		else
916			error = sooptcopyout(sopt, &optval, sizeof(int));
917		break;
918
919	case IP_MULTICAST_LOOP:
920		if (imo == 0)
921			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
922		else
923			optval = coptval = imo->imo_multicast_loop;
924		INP_WUNLOCK(inp);
925		if (sopt->sopt_valsize == sizeof(u_char))
926			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
927		else
928			error = sooptcopyout(sopt, &optval, sizeof(int));
929		break;
930
931	case IP_MSFILTER:
932		if (imo == NULL) {
933			error = EADDRNOTAVAIL;
934			INP_WUNLOCK(inp);
935		} else {
936			error = inp_get_source_filters(inp, sopt);
937		}
938		break;
939
940	default:
941		INP_WUNLOCK(inp);
942		error = ENOPROTOOPT;
943		break;
944	}
945
946	INP_UNLOCK_ASSERT(inp);
947
948	return (error);
949}
950
951/*
952 * Join an IPv4 multicast group, possibly with a source.
953 */
954static int
955inp_join_group(struct inpcb *inp, struct sockopt *sopt)
956{
957	struct group_source_req		 gsr;
958	sockunion_t			*gsa, *ssa;
959	struct ifnet			*ifp;
960	struct in_mfilter		*imf;
961	struct ip_moptions		*imo;
962	struct in_multi			*inm;
963	size_t				 idx;
964	int				 error;
965
966	ifp = NULL;
967	error = 0;
968
969	memset(&gsr, 0, sizeof(struct group_source_req));
970	gsa = (sockunion_t *)&gsr.gsr_group;
971	gsa->ss.ss_family = AF_UNSPEC;
972	ssa = (sockunion_t *)&gsr.gsr_source;
973	ssa->ss.ss_family = AF_UNSPEC;
974
975	switch (sopt->sopt_name) {
976	case IP_ADD_MEMBERSHIP:
977	case IP_ADD_SOURCE_MEMBERSHIP: {
978		struct ip_mreq_source	 mreqs;
979
980		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
981			error = sooptcopyin(sopt, &mreqs,
982			    sizeof(struct ip_mreq),
983			    sizeof(struct ip_mreq));
984			/*
985			 * Do argument switcharoo from ip_mreq into
986			 * ip_mreq_source to avoid using two instances.
987			 */
988			mreqs.imr_interface = mreqs.imr_sourceaddr;
989			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
990		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
991			error = sooptcopyin(sopt, &mreqs,
992			    sizeof(struct ip_mreq_source),
993			    sizeof(struct ip_mreq_source));
994		}
995		if (error)
996			return (error);
997
998		gsa->sin.sin_family = AF_INET;
999		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1000		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1001
1002		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1003			ssa->sin.sin_family = AF_INET;
1004			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1005			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1006		}
1007
1008		/*
1009		 * Obtain ifp. If no interface address was provided,
1010		 * use the interface of the route in the unicast FIB for
1011		 * the given multicast destination; usually, this is the
1012		 * default route.
1013		 * If this lookup fails, attempt to use the first non-loopback
1014		 * interface with multicast capability in the system as a
1015		 * last resort. The legacy IPv4 ASM API requires that we do
1016		 * this in order to allow groups to be joined when the routing
1017		 * table has not yet been populated during boot.
1018		 * If all of these conditions fail, return EADDRNOTAVAIL, and
1019		 * reject the IPv4 multicast join.
1020		 */
1021		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
1022			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1023		} else {
1024			struct route ro;
1025
1026			ro.ro_rt = NULL;
1027			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
1028			in_rtalloc_ign(&ro, RTF_CLONING,
1029			   inp->inp_inc.inc_fibnum);
1030			if (ro.ro_rt != NULL) {
1031				ifp = ro.ro_rt->rt_ifp;
1032				KASSERT(ifp != NULL, ("%s: null ifp",
1033				    __func__));
1034				RTFREE(ro.ro_rt);
1035			} else {
1036				struct in_ifaddr *ia;
1037				struct ifnet *mfp = NULL;
1038				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1039					mfp = ia->ia_ifp;
1040					if (!(mfp->if_flags & IFF_LOOPBACK) &&
1041					     (mfp->if_flags & IFF_MULTICAST)) {
1042						ifp = mfp;
1043						break;
1044					}
1045				}
1046			}
1047		}
1048#ifdef DIAGNOSTIC
1049		if (bootverbose) {
1050			printf("%s: imr_interface = %s, ifp = %p\n",
1051			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1052		}
1053#endif
1054		break;
1055	}
1056
1057	case MCAST_JOIN_GROUP:
1058	case MCAST_JOIN_SOURCE_GROUP:
1059		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1060			error = sooptcopyin(sopt, &gsr,
1061			    sizeof(struct group_req),
1062			    sizeof(struct group_req));
1063		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1064			error = sooptcopyin(sopt, &gsr,
1065			    sizeof(struct group_source_req),
1066			    sizeof(struct group_source_req));
1067		}
1068		if (error)
1069			return (error);
1070
1071		if (gsa->sin.sin_family != AF_INET ||
1072		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1073			return (EINVAL);
1074
1075		/*
1076		 * Overwrite the port field if present, as the sockaddr
1077		 * being copied in may be matched with a binary comparison.
1078		 * XXX INET6
1079		 */
1080		gsa->sin.sin_port = 0;
1081		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1082			if (ssa->sin.sin_family != AF_INET ||
1083			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1084				return (EINVAL);
1085			ssa->sin.sin_port = 0;
1086		}
1087
1088		/*
1089		 * Obtain the ifp.
1090		 */
1091		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
1092			return (EADDRNOTAVAIL);
1093		ifp = ifnet_byindex(gsr.gsr_interface);
1094
1095		break;
1096
1097	default:
1098#ifdef DIAGNOSTIC
1099		if (bootverbose) {
1100			printf("%s: unknown sopt_name %d\n", __func__,
1101			    sopt->sopt_name);
1102		}
1103#endif
1104		return (EOPNOTSUPP);
1105		break;
1106	}
1107
1108	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1109		return (EINVAL);
1110
1111	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1112		return (EADDRNOTAVAIL);
1113
1114	/*
1115	 * Check if we already hold membership of this group for this inpcb.
1116	 * If so, we do not need to perform the initial join.
1117	 */
1118	imo = inp_findmoptions(inp);
1119	idx = imo_match_group(imo, ifp, &gsa->sa);
1120	if (idx != -1) {
1121		if (ssa->ss.ss_family != AF_UNSPEC) {
1122			/*
1123			 * Attempting to join an ASM group (when already
1124			 * an ASM or SSM member) is an error.
1125			 */
1126			error = EADDRNOTAVAIL;
1127		} else {
1128			imf = &imo->imo_mfilters[idx];
1129			if (imf->imf_nsources == 0) {
1130				/*
1131				 * Attempting to join an SSM group (when
1132				 * already an ASM member) is an error.
1133				 */
1134				error = EINVAL;
1135			} else {
1136				/*
1137				 * Attempting to join an SSM group (when
1138				 * already an SSM member) means "add this
1139				 * source to the inclusive filter list".
1140				 */
1141				error = imo_join_source(imo, idx, ssa);
1142			}
1143		}
1144		goto out_locked;
1145	}
1146
1147	/*
1148	 * Call imo_grow() to reallocate the membership and source filter
1149	 * vectors if they are full. If the size would exceed the hard limit,
1150	 * then we know we've really run out of entries. We keep the INP
1151	 * lock held to avoid introducing a race condition.
1152	 */
1153	if (imo->imo_num_memberships == imo->imo_max_memberships) {
1154		error = imo_grow(imo);
1155		if (error)
1156			goto out_locked;
1157	}
1158
1159	/*
1160	 * So far, so good: perform the layer 3 join, layer 2 join,
1161	 * and make an IGMP announcement if needed.
1162	 */
1163	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
1164	if (inm == NULL) {
1165		error = ENOBUFS;
1166		goto out_locked;
1167	}
1168	idx = imo->imo_num_memberships;
1169	imo->imo_membership[idx] = inm;
1170	imo->imo_num_memberships++;
1171
1172	KASSERT(imo->imo_mfilters != NULL,
1173	    ("%s: imf_mfilters vector was not allocated", __func__));
1174	imf = &imo->imo_mfilters[idx];
1175	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1176	    ("%s: imf_sources not empty", __func__));
1177
1178	/*
1179	 * If this is a new SSM group join (i.e. a source was specified
1180	 * with this group), add this source to the filter list.
1181	 */
1182	if (ssa->ss.ss_family != AF_UNSPEC) {
1183		/*
1184		 * An initial SSM join implies that this socket's membership
1185		 * of the multicast group is now in inclusive mode.
1186		 */
1187		imf->imf_fmode = MCAST_INCLUDE;
1188
1189		error = imo_join_source(imo, idx, ssa);
1190		if (error) {
1191			/*
1192			 * Drop inp lock before calling in_delmulti(),
1193			 * to prevent a lock order reversal.
1194			 */
1195			--imo->imo_num_memberships;
1196			INP_WUNLOCK(inp);
1197			in_delmulti(inm);
1198			return (error);
1199		}
1200	}
1201
1202out_locked:
1203	INP_WUNLOCK(inp);
1204	return (error);
1205}
1206
1207/*
1208 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
1209 */
1210static int
1211inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
1212{
1213	struct group_source_req		 gsr;
1214	struct ip_mreq_source		 mreqs;
1215	sockunion_t			*gsa, *ssa;
1216	struct ifnet			*ifp;
1217	struct in_mfilter		*imf;
1218	struct ip_moptions		*imo;
1219	struct in_msource		*ims, *tims;
1220	struct in_multi			*inm;
1221	size_t				 idx;
1222	int				 error;
1223
1224	ifp = NULL;
1225	error = 0;
1226
1227	memset(&gsr, 0, sizeof(struct group_source_req));
1228	gsa = (sockunion_t *)&gsr.gsr_group;
1229	gsa->ss.ss_family = AF_UNSPEC;
1230	ssa = (sockunion_t *)&gsr.gsr_source;
1231	ssa->ss.ss_family = AF_UNSPEC;
1232
1233	switch (sopt->sopt_name) {
1234	case IP_DROP_MEMBERSHIP:
1235	case IP_DROP_SOURCE_MEMBERSHIP:
1236		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
1237			error = sooptcopyin(sopt, &mreqs,
1238			    sizeof(struct ip_mreq),
1239			    sizeof(struct ip_mreq));
1240			/*
1241			 * Swap interface and sourceaddr arguments,
1242			 * as ip_mreq and ip_mreq_source are laid
1243			 * out differently.
1244			 */
1245			mreqs.imr_interface = mreqs.imr_sourceaddr;
1246			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1247		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1248			error = sooptcopyin(sopt, &mreqs,
1249			    sizeof(struct ip_mreq_source),
1250			    sizeof(struct ip_mreq_source));
1251		}
1252		if (error)
1253			return (error);
1254
1255		gsa->sin.sin_family = AF_INET;
1256		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1257		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1258
1259		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1260			ssa->sin.sin_family = AF_INET;
1261			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1262			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1263		}
1264
1265		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
1266			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1267
1268#ifdef DIAGNOSTIC
1269		if (bootverbose) {
1270			printf("%s: imr_interface = %s, ifp = %p\n",
1271			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1272		}
1273#endif
1274		break;
1275
1276	case MCAST_LEAVE_GROUP:
1277	case MCAST_LEAVE_SOURCE_GROUP:
1278		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
1279			error = sooptcopyin(sopt, &gsr,
1280			    sizeof(struct group_req),
1281			    sizeof(struct group_req));
1282		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1283			error = sooptcopyin(sopt, &gsr,
1284			    sizeof(struct group_source_req),
1285			    sizeof(struct group_source_req));
1286		}
1287		if (error)
1288			return (error);
1289
1290		if (gsa->sin.sin_family != AF_INET ||
1291		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1292			return (EINVAL);
1293
1294		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1295			if (ssa->sin.sin_family != AF_INET ||
1296			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1297				return (EINVAL);
1298		}
1299
1300		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
1301			return (EADDRNOTAVAIL);
1302
1303		ifp = ifnet_byindex(gsr.gsr_interface);
1304		break;
1305
1306	default:
1307#ifdef DIAGNOSTIC
1308		if (bootverbose) {
1309			printf("%s: unknown sopt_name %d\n", __func__,
1310			    sopt->sopt_name);
1311		}
1312#endif
1313		return (EOPNOTSUPP);
1314		break;
1315	}
1316
1317	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1318		return (EINVAL);
1319
1320	/*
1321	 * Find the membership in the membership array.
1322	 */
1323	imo = inp_findmoptions(inp);
1324	idx = imo_match_group(imo, ifp, &gsa->sa);
1325	if (idx == -1) {
1326		error = EADDRNOTAVAIL;
1327		goto out_locked;
1328	}
1329	imf = &imo->imo_mfilters[idx];
1330
1331	/*
1332	 * If we were instructed only to leave a given source, do so.
1333	 */
1334	if (ssa->ss.ss_family != AF_UNSPEC) {
1335		if (imf->imf_nsources == 0 ||
1336		    imf->imf_fmode == MCAST_EXCLUDE) {
1337			/*
1338			 * Attempting to SSM leave an ASM group
1339			 * is an error; should use *_BLOCK_SOURCE instead.
1340			 * Attempting to SSM leave a source in a group when
1341			 * the socket is in 'exclude mode' is also an error.
1342			 */
1343			error = EINVAL;
1344		} else {
1345			error = imo_leave_source(imo, idx, ssa);
1346		}
1347		/*
1348		 * If an error occurred, or this source is not the last
1349		 * source in the group, do not leave the whole group.
1350		 */
1351		if (error || imf->imf_nsources > 0)
1352			goto out_locked;
1353	}
1354
1355	/*
1356	 * Give up the multicast address record to which the membership points.
1357	 */
1358	inm = imo->imo_membership[idx];
1359	in_delmulti(inm);
1360
1361	/*
1362	 * Free any source filters for this group if they exist.
1363	 * Revert inpcb to the default MCAST_EXCLUDE state.
1364	 */
1365	if (imo->imo_mfilters != NULL) {
1366		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1367			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1368			FREE(ims, M_IPMSOURCE);
1369			imf->imf_nsources--;
1370		}
1371		KASSERT(imf->imf_nsources == 0,
1372		    ("%s: imf_nsources not 0", __func__));
1373		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1374		    ("%s: imf_sources not empty", __func__));
1375		imf->imf_fmode = MCAST_EXCLUDE;
1376	}
1377
1378	/*
1379	 * Remove the gap in the membership array.
1380	 */
1381	for (++idx; idx < imo->imo_num_memberships; ++idx)
1382		imo->imo_membership[idx-1] = imo->imo_membership[idx];
1383	imo->imo_num_memberships--;
1384
1385out_locked:
1386	INP_WUNLOCK(inp);
1387	return (error);
1388}
1389
1390/*
1391 * Select the interface for transmitting IPv4 multicast datagrams.
1392 *
1393 * Either an instance of struct in_addr or an instance of struct ip_mreqn
1394 * may be passed to this socket option. An address of INADDR_ANY or an
1395 * interface index of 0 is used to remove a previous selection.
1396 * When no interface is selected, one is chosen for every send.
1397 */
1398static int
1399inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
1400{
1401	struct in_addr		 addr;
1402	struct ip_mreqn		 mreqn;
1403	struct ifnet		*ifp;
1404	struct ip_moptions	*imo;
1405	int			 error;
1406
1407	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1408		/*
1409		 * An interface index was specified using the
1410		 * Linux-derived ip_mreqn structure.
1411		 */
1412		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
1413		    sizeof(struct ip_mreqn));
1414		if (error)
1415			return (error);
1416
1417		if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex)
1418			return (EINVAL);
1419
1420		if (mreqn.imr_ifindex == 0) {
1421			ifp = NULL;
1422		} else {
1423			ifp = ifnet_byindex(mreqn.imr_ifindex);
1424			if (ifp == NULL)
1425				return (EADDRNOTAVAIL);
1426		}
1427	} else {
1428		/*
1429		 * An interface was specified by IPv4 address.
1430		 * This is the traditional BSD usage.
1431		 */
1432		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
1433		    sizeof(struct in_addr));
1434		if (error)
1435			return (error);
1436		if (addr.s_addr == INADDR_ANY) {
1437			ifp = NULL;
1438		} else {
1439			INADDR_TO_IFP(addr, ifp);
1440			if (ifp == NULL)
1441				return (EADDRNOTAVAIL);
1442		}
1443#ifdef DIAGNOSTIC
1444		if (bootverbose) {
1445			printf("%s: ifp = %p, addr = %s\n",
1446			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
1447		}
1448#endif
1449	}
1450
1451	/* Reject interfaces which do not support multicast. */
1452	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
1453		return (EOPNOTSUPP);
1454
1455	imo = inp_findmoptions(inp);
1456	imo->imo_multicast_ifp = ifp;
1457	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1458	INP_WUNLOCK(inp);
1459
1460	return (0);
1461}
1462
1463/*
1464 * Atomically set source filters on a socket for an IPv4 multicast group.
1465 */
1466static int
1467inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
1468{
1469	struct __msfilterreq	 msfr;
1470	sockunion_t		*gsa;
1471	struct ifnet		*ifp;
1472	struct in_mfilter	*imf;
1473	struct ip_moptions	*imo;
1474	struct in_msource	*ims, *tims;
1475	size_t			 idx;
1476	int			 error;
1477
1478	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1479	    sizeof(struct __msfilterreq));
1480	if (error)
1481		return (error);
1482
1483	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
1484	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
1485	     msfr.msfr_fmode != MCAST_INCLUDE))
1486		return (EINVAL);
1487
1488	if (msfr.msfr_group.ss_family != AF_INET ||
1489	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
1490		return (EINVAL);
1491
1492	gsa = (sockunion_t *)&msfr.msfr_group;
1493	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1494		return (EINVAL);
1495
1496	gsa->sin.sin_port = 0;	/* ignore port */
1497
1498	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
1499		return (EADDRNOTAVAIL);
1500
1501	ifp = ifnet_byindex(msfr.msfr_ifindex);
1502	if (ifp == NULL)
1503		return (EADDRNOTAVAIL);
1504
1505	/*
1506	 * Take the INP lock.
1507	 * Check if this socket is a member of this group.
1508	 */
1509	imo = inp_findmoptions(inp);
1510	idx = imo_match_group(imo, ifp, &gsa->sa);
1511	if (idx == -1 || imo->imo_mfilters == NULL) {
1512		error = EADDRNOTAVAIL;
1513		goto out_locked;
1514	}
1515	imf = &imo->imo_mfilters[idx];
1516
1517#ifdef DIAGNOSTIC
1518	if (bootverbose)
1519		printf("%s: clearing source list\n", __func__);
1520#endif
1521
1522	/*
1523	 * Remove any existing source filters.
1524	 */
1525	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1526		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1527		FREE(ims, M_IPMSOURCE);
1528		imf->imf_nsources--;
1529	}
1530	KASSERT(imf->imf_nsources == 0,
1531	    ("%s: source list not cleared", __func__));
1532
1533	/*
1534	 * Apply any new source filters, if present.
1535	 */
1536	if (msfr.msfr_nsrcs > 0) {
1537		struct in_msource	**pnims;
1538		struct in_msource	*nims;
1539		struct sockaddr_storage	*kss;
1540		struct sockaddr_storage	*pkss;
1541		sockunion_t		*psu;
1542		int			 i, j;
1543
1544		/*
1545		 * Drop the inp lock so we may sleep if we need to
1546		 * in order to satisfy a malloc request.
1547		 * We will re-take it before changing socket state.
1548		 */
1549		INP_WUNLOCK(inp);
1550#ifdef DIAGNOSTIC
1551		if (bootverbose) {
1552			printf("%s: loading %lu source list entries\n",
1553			    __func__, (unsigned long)msfr.msfr_nsrcs);
1554		}
1555#endif
1556		/*
1557		 * Make a copy of the user-space source vector so
1558		 * that we may copy them with a single copyin. This
1559		 * allows us to deal with page faults up-front.
1560		 */
1561		MALLOC(kss, struct sockaddr_storage *,
1562		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1563		    M_TEMP, M_WAITOK);
1564		error = copyin(msfr.msfr_srcs, kss,
1565		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1566		if (error) {
1567			FREE(kss, M_TEMP);
1568			return (error);
1569		}
1570
1571		/*
1572		 * Perform argument checking on every sockaddr_storage
1573		 * structure in the vector provided to us. Overwrite
1574		 * fields which should not apply to source entries.
1575		 * TODO: Check for duplicate sources on this pass.
1576		 */
1577		psu = (sockunion_t *)kss;
1578		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
1579			switch (psu->ss.ss_family) {
1580			case AF_INET:
1581				if (psu->sin.sin_len !=
1582				    sizeof(struct sockaddr_in)) {
1583					error = EINVAL;
1584				} else {
1585					psu->sin.sin_port = 0;
1586				}
1587				break;
1588#ifdef notyet
1589			case AF_INET6;
1590				if (psu->sin6.sin6_len !=
1591				    sizeof(struct sockaddr_in6)) {
1592					error = EINVAL;
1593				} else {
1594					psu->sin6.sin6_port = 0;
1595					psu->sin6.sin6_flowinfo = 0;
1596				}
1597				break;
1598#endif
1599			default:
1600				error = EAFNOSUPPORT;
1601				break;
1602			}
1603			if (error)
1604				break;
1605		}
1606		if (error) {
1607			FREE(kss, M_TEMP);
1608			return (error);
1609		}
1610
1611		/*
1612		 * Allocate a block to track all the in_msource
1613		 * entries we are about to allocate, in case we
1614		 * abruptly need to free them.
1615		 */
1616		MALLOC(pnims, struct in_msource **,
1617		    sizeof(struct in_msource *) * msfr.msfr_nsrcs,
1618		    M_TEMP, M_WAITOK | M_ZERO);
1619
1620		/*
1621		 * Allocate up to nsrcs individual chunks.
1622		 * If we encounter an error, backtrack out of
1623		 * all allocations cleanly; updates must be atomic.
1624		 */
1625		pkss = kss;
1626		nims = NULL;
1627		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1628			MALLOC(nims, struct in_msource *,
1629			    sizeof(struct in_msource) * msfr.msfr_nsrcs,
1630			    M_IPMSOURCE, M_WAITOK | M_ZERO);
1631			pnims[i] = nims;
1632		}
1633		if (i < msfr.msfr_nsrcs) {
1634			for (j = 0; j < i; j++) {
1635				if (pnims[j] != NULL)
1636					FREE(pnims[j], M_IPMSOURCE);
1637			}
1638			FREE(pnims, M_TEMP);
1639			FREE(kss, M_TEMP);
1640			return (ENOBUFS);
1641		}
1642
1643		INP_UNLOCK_ASSERT(inp);
1644
1645		/*
1646		 * Finally, apply the filters to the socket.
1647		 * Re-take the inp lock; we are changing socket state.
1648		 */
1649		pkss = kss;
1650		INP_WLOCK(inp);
1651		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1652			memcpy(&(pnims[i]->ims_addr), pkss,
1653			    sizeof(struct sockaddr_storage));
1654			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
1655			    ims_next);
1656			imf->imf_nsources++;
1657		}
1658		FREE(pnims, M_TEMP);
1659		FREE(kss, M_TEMP);
1660	}
1661
1662	/*
1663	 * Update the filter mode on the socket before releasing the inpcb.
1664	 */
1665	INP_WLOCK_ASSERT(inp);
1666	imf->imf_fmode = msfr.msfr_fmode;
1667
1668out_locked:
1669	INP_WUNLOCK(inp);
1670	return (error);
1671}
1672
1673/*
1674 * Set the IP multicast options in response to user setsockopt().
1675 *
1676 * Many of the socket options handled in this function duplicate the
1677 * functionality of socket options in the regular unicast API. However,
1678 * it is not possible to merge the duplicate code, because the idempotence
1679 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
1680 * the effects of these options must be treated as separate and distinct.
1681 */
1682int
1683inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1684{
1685	struct ip_moptions	*imo;
1686	int			 error;
1687
1688	error = 0;
1689
1690	/*
1691	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1692	 * or is a divert socket, reject it.
1693	 * XXX Unlocked read of inp_socket believed OK.
1694	 */
1695	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1696	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1697	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1698		return (EOPNOTSUPP);
1699
1700	switch (sopt->sopt_name) {
1701	case IP_MULTICAST_VIF: {
1702		int vifi;
1703		/*
1704		 * Select a multicast VIF for transmission.
1705		 * Only useful if multicast forwarding is active.
1706		 */
1707		if (legal_vif_num == NULL) {
1708			error = EOPNOTSUPP;
1709			break;
1710		}
1711		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
1712		if (error)
1713			break;
1714		if (!legal_vif_num(vifi) && (vifi != -1)) {
1715			error = EINVAL;
1716			break;
1717		}
1718		imo = inp_findmoptions(inp);
1719		imo->imo_multicast_vif = vifi;
1720		INP_WUNLOCK(inp);
1721		break;
1722	}
1723
1724	case IP_MULTICAST_IF:
1725		error = inp_set_multicast_if(inp, sopt);
1726		break;
1727
1728	case IP_MULTICAST_TTL: {
1729		u_char ttl;
1730
1731		/*
1732		 * Set the IP time-to-live for outgoing multicast packets.
1733		 * The original multicast API required a char argument,
1734		 * which is inconsistent with the rest of the socket API.
1735		 * We allow either a char or an int.
1736		 */
1737		if (sopt->sopt_valsize == sizeof(u_char)) {
1738			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
1739			    sizeof(u_char));
1740			if (error)
1741				break;
1742		} else {
1743			u_int ittl;
1744
1745			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
1746			    sizeof(u_int));
1747			if (error)
1748				break;
1749			if (ittl > 255) {
1750				error = EINVAL;
1751				break;
1752			}
1753			ttl = (u_char)ittl;
1754		}
1755		imo = inp_findmoptions(inp);
1756		imo->imo_multicast_ttl = ttl;
1757		INP_WUNLOCK(inp);
1758		break;
1759	}
1760
1761	case IP_MULTICAST_LOOP: {
1762		u_char loop;
1763
1764		/*
1765		 * Set the loopback flag for outgoing multicast packets.
1766		 * Must be zero or one.  The original multicast API required a
1767		 * char argument, which is inconsistent with the rest
1768		 * of the socket API.  We allow either a char or an int.
1769		 */
1770		if (sopt->sopt_valsize == sizeof(u_char)) {
1771			error = sooptcopyin(sopt, &loop, sizeof(u_char),
1772			    sizeof(u_char));
1773			if (error)
1774				break;
1775		} else {
1776			u_int iloop;
1777
1778			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
1779					    sizeof(u_int));
1780			if (error)
1781				break;
1782			loop = (u_char)iloop;
1783		}
1784		imo = inp_findmoptions(inp);
1785		imo->imo_multicast_loop = !!loop;
1786		INP_WUNLOCK(inp);
1787		break;
1788	}
1789
1790	case IP_ADD_MEMBERSHIP:
1791	case IP_ADD_SOURCE_MEMBERSHIP:
1792	case MCAST_JOIN_GROUP:
1793	case MCAST_JOIN_SOURCE_GROUP:
1794		error = inp_join_group(inp, sopt);
1795		break;
1796
1797	case IP_DROP_MEMBERSHIP:
1798	case IP_DROP_SOURCE_MEMBERSHIP:
1799	case MCAST_LEAVE_GROUP:
1800	case MCAST_LEAVE_SOURCE_GROUP:
1801		error = inp_leave_group(inp, sopt);
1802		break;
1803
1804	case IP_BLOCK_SOURCE:
1805	case IP_UNBLOCK_SOURCE:
1806	case MCAST_BLOCK_SOURCE:
1807	case MCAST_UNBLOCK_SOURCE:
1808		error = inp_change_source_filter(inp, sopt);
1809		break;
1810
1811	case IP_MSFILTER:
1812		error = inp_set_source_filters(inp, sopt);
1813		break;
1814
1815	default:
1816		error = EOPNOTSUPP;
1817		break;
1818	}
1819
1820	INP_UNLOCK_ASSERT(inp);
1821
1822	return (error);
1823}
1824