1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2010 Bjoern A. Zeeb <bz@FreeBSD.org>
5 * Copyright (c) 1980, 1986, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include "opt_bpf.h"
34#include "opt_inet6.h"
35#include "opt_inet.h"
36#include "opt_ddb.h"
37
38#include <sys/param.h>
39#include <sys/capsicum.h>
40#include <sys/conf.h>
41#include <sys/eventhandler.h>
42#include <sys/malloc.h>
43#include <sys/domainset.h>
44#include <sys/sbuf.h>
45#include <sys/bus.h>
46#include <sys/epoch.h>
47#include <sys/mbuf.h>
48#include <sys/systm.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/protosw.h>
54#include <sys/kernel.h>
55#include <sys/lock.h>
56#include <sys/refcount.h>
57#include <sys/module.h>
58#include <sys/nv.h>
59#include <sys/rwlock.h>
60#include <sys/sockio.h>
61#include <sys/syslog.h>
62#include <sys/sysctl.h>
63#include <sys/sysent.h>
64#include <sys/taskqueue.h>
65#include <sys/domain.h>
66#include <sys/jail.h>
67#include <sys/priv.h>
68
69#ifdef DDB
70#include <ddb/ddb.h>
71#endif
72
73#include <machine/stdarg.h>
74#include <vm/uma.h>
75
76#include <net/bpf.h>
77#include <net/ethernet.h>
78#include <net/if.h>
79#include <net/if_arp.h>
80#include <net/if_clone.h>
81#include <net/if_dl.h>
82#include <net/if_strings.h>
83#include <net/if_types.h>
84#include <net/if_var.h>
85#include <net/if_media.h>
86#include <net/if_mib.h>
87#include <net/if_private.h>
88#include <net/if_vlan_var.h>
89#include <net/radix.h>
90#include <net/route.h>
91#include <net/route/route_ctl.h>
92#include <net/vnet.h>
93
94#if defined(INET) || defined(INET6)
95#include <net/ethernet.h>
96#include <netinet/in.h>
97#include <netinet/in_var.h>
98#include <netinet/ip.h>
99#include <netinet/ip_carp.h>
100#ifdef INET
101#include <net/debugnet.h>
102#include <netinet/if_ether.h>
103#endif /* INET */
104#ifdef INET6
105#include <netinet6/in6_var.h>
106#include <netinet6/in6_ifattach.h>
107#endif /* INET6 */
108#endif /* INET || INET6 */
109
110#include <security/mac/mac_framework.h>
111
112/*
113 * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name
114 * and ifr_ifru when it is used in SIOCGIFCONF.
115 */
116_Static_assert(sizeof(((struct ifreq *)0)->ifr_name) ==
117    offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru");
118
119__read_mostly epoch_t net_epoch_preempt;
120#ifdef COMPAT_FREEBSD32
121#include <sys/mount.h>
122#include <compat/freebsd32/freebsd32.h>
123
124struct ifreq_buffer32 {
125	uint32_t	length;		/* (size_t) */
126	uint32_t	buffer;		/* (void *) */
127};
128
129/*
130 * Interface request structure used for socket
131 * ioctl's.  All interface ioctl's must have parameter
132 * definitions which begin with ifr_name.  The
133 * remainder may be interface specific.
134 */
135struct ifreq32 {
136	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
137	union {
138		struct sockaddr	ifru_addr;
139		struct sockaddr	ifru_dstaddr;
140		struct sockaddr	ifru_broadaddr;
141		struct ifreq_buffer32 ifru_buffer;
142		short		ifru_flags[2];
143		short		ifru_index;
144		int		ifru_jid;
145		int		ifru_metric;
146		int		ifru_mtu;
147		int		ifru_phys;
148		int		ifru_media;
149		uint32_t	ifru_data;
150		int		ifru_cap[2];
151		u_int		ifru_fib;
152		u_char		ifru_vlan_pcp;
153	} ifr_ifru;
154};
155CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
156CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
157    __offsetof(struct ifreq32, ifr_ifru));
158
159struct ifconf32 {
160	int32_t	ifc_len;
161	union {
162		uint32_t	ifcu_buf;
163		uint32_t	ifcu_req;
164	} ifc_ifcu;
165};
166#define	SIOCGIFCONF32	_IOWR('i', 36, struct ifconf32)
167
168struct ifdrv32 {
169	char		ifd_name[IFNAMSIZ];
170	uint32_t	ifd_cmd;
171	uint32_t	ifd_len;
172	uint32_t	ifd_data;
173};
174#define SIOCSDRVSPEC32	_IOC_NEWTYPE(SIOCSDRVSPEC, struct ifdrv32)
175#define SIOCGDRVSPEC32	_IOC_NEWTYPE(SIOCGDRVSPEC, struct ifdrv32)
176
177struct ifgroupreq32 {
178	char	ifgr_name[IFNAMSIZ];
179	u_int	ifgr_len;
180	union {
181		char		ifgru_group[IFNAMSIZ];
182		uint32_t	ifgru_groups;
183	} ifgr_ifgru;
184};
185#define	SIOCAIFGROUP32	_IOC_NEWTYPE(SIOCAIFGROUP, struct ifgroupreq32)
186#define	SIOCGIFGROUP32	_IOC_NEWTYPE(SIOCGIFGROUP, struct ifgroupreq32)
187#define	SIOCDIFGROUP32	_IOC_NEWTYPE(SIOCDIFGROUP, struct ifgroupreq32)
188#define	SIOCGIFGMEMB32	_IOC_NEWTYPE(SIOCGIFGMEMB, struct ifgroupreq32)
189
190struct ifmediareq32 {
191	char		ifm_name[IFNAMSIZ];
192	int		ifm_current;
193	int		ifm_mask;
194	int		ifm_status;
195	int		ifm_active;
196	int		ifm_count;
197	uint32_t	ifm_ulist;	/* (int *) */
198};
199#define	SIOCGIFMEDIA32	_IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
200#define	SIOCGIFXMEDIA32	_IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
201#endif /* COMPAT_FREEBSD32 */
202
203union ifreq_union {
204	struct ifreq	ifr;
205#ifdef COMPAT_FREEBSD32
206	struct ifreq32	ifr32;
207#endif
208};
209
210SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
211    "Link layers");
212SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
213    "Generic link-management");
214
215SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
216    &ifqmaxlen, 0, "max send queue size");
217
218/* Log link state change events */
219static int log_link_state_change = 1;
220
221SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
222	&log_link_state_change, 0,
223	"log interface link state change events");
224
225/* Log promiscuous mode change events */
226static int log_promisc_mode_change = 1;
227
228SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
229	&log_promisc_mode_change, 1,
230	"log promiscuous mode change events");
231
232/* Interface description */
233static unsigned int ifdescr_maxlen = 1024;
234SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
235	&ifdescr_maxlen, 0,
236	"administrative maximum length for interface description");
237
238static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
239
240/* global sx for non-critical path ifdescr */
241static struct sx ifdescr_sx;
242SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
243
244void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
245void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
246/* These are external hooks for CARP. */
247void	(*carp_linkstate_p)(struct ifnet *ifp);
248void	(*carp_demote_adj_p)(int, char *);
249int	(*carp_master_p)(struct ifaddr *);
250#if defined(INET) || defined(INET6)
251int	(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
252int	(*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
253    const struct sockaddr *sa);
254int	(*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
255int	(*carp_attach_p)(struct ifaddr *, int);
256void	(*carp_detach_p)(struct ifaddr *, bool);
257#endif
258#ifdef INET
259int	(*carp_iamatch_p)(struct ifaddr *, uint8_t **);
260#endif
261#ifdef INET6
262struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
263caddr_t	(*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
264    const struct in6_addr *taddr);
265#endif
266
267struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
268
269/*
270 * XXX: Style; these should be sorted alphabetically, and unprototyped
271 * static functions should be prototyped. Currently they are sorted by
272 * declaration order.
273 */
274static void	if_attachdomain(void *);
275static void	if_attachdomain1(struct ifnet *);
276static int	ifconf(u_long, caddr_t);
277static void	if_input_default(struct ifnet *, struct mbuf *);
278static int	if_requestencap_default(struct ifnet *, struct if_encap_req *);
279static int	if_setflag(struct ifnet *, int, int, int *, int);
280static int	if_transmit_default(struct ifnet *ifp, struct mbuf *m);
281static void	if_unroute(struct ifnet *, int flag, int fam);
282static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
283static void	do_link_state_change(void *, int);
284static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
285static int	if_getgroupmembers(struct ifgroupreq *);
286static void	if_delgroups(struct ifnet *);
287static void	if_attach_internal(struct ifnet *, bool);
288static int	if_detach_internal(struct ifnet *, bool);
289static void	if_siocaddmulti(void *, int);
290static void	if_link_ifnet(struct ifnet *);
291static bool	if_unlink_ifnet(struct ifnet *, bool);
292#ifdef VIMAGE
293static int	if_vmove(struct ifnet *, struct vnet *);
294#endif
295
296#ifdef INET6
297/*
298 * XXX: declare here to avoid to include many inet6 related files..
299 * should be more generalized?
300 */
301extern void	nd6_setmtu(struct ifnet *);
302#endif
303
304/* ipsec helper hooks */
305VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
306VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
307
308int	ifqmaxlen = IFQ_MAXLEN;
309VNET_DEFINE(struct ifnethead, ifnet);	/* depend on static init XXX */
310VNET_DEFINE(struct ifgrouphead, ifg_head);
311
312/* Table of ifnet by index. */
313static int if_index;
314static int if_indexlim = 8;
315static struct ifindex_entry {
316	struct ifnet	*ife_ifnet;
317	uint16_t	ife_gencnt;
318} *ifindex_table;
319
320SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
321    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
322    "Variables global to all interfaces");
323static int
324sysctl_ifcount(SYSCTL_HANDLER_ARGS)
325{
326	int rv = 0;
327
328	IFNET_RLOCK();
329	for (int i = 1; i <= if_index; i++)
330		if (ifindex_table[i].ife_ifnet != NULL &&
331		    ifindex_table[i].ife_ifnet->if_vnet == curvnet)
332			rv = i;
333	IFNET_RUNLOCK();
334
335	return (sysctl_handle_int(oidp, &rv, 0, req));
336}
337SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount,
338    CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I",
339    "Maximum known interface index");
340
341/*
342 * The global network interface list (V_ifnet) and related state (such as
343 * if_index, if_indexlim, and ifindex_table) are protected by an sxlock.
344 * This may be acquired to stabilise the list, or we may rely on NET_EPOCH.
345 */
346struct sx ifnet_sxlock;
347SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
348
349struct sx ifnet_detach_sxlock;
350SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx",
351    SX_RECURSE);
352
353#ifdef VIMAGE
354#define	VNET_IS_SHUTTING_DOWN(_vnet)					\
355    ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
356#endif
357
358static	if_com_alloc_t *if_com_alloc[256];
359static	if_com_free_t *if_com_free[256];
360
361static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
362MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
363MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
364
365struct ifnet *
366ifnet_byindex(u_int idx)
367{
368	struct ifnet *ifp;
369
370	NET_EPOCH_ASSERT();
371
372	if (__predict_false(idx > if_index))
373		return (NULL);
374
375	ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
376
377	if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet)
378		ifp = NULL;
379
380	return (ifp);
381}
382
383struct ifnet *
384ifnet_byindex_ref(u_int idx)
385{
386	struct ifnet *ifp;
387
388	ifp = ifnet_byindex(idx);
389	if (ifp == NULL || (ifp->if_flags & IFF_DYING))
390		return (NULL);
391	if (!if_try_ref(ifp))
392		return (NULL);
393	return (ifp);
394}
395
396struct ifnet *
397ifnet_byindexgen(uint16_t idx, uint16_t gen)
398{
399	struct ifnet *ifp;
400
401	NET_EPOCH_ASSERT();
402
403	if (__predict_false(idx > if_index))
404		return (NULL);
405
406	ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
407
408	if (ifindex_table[idx].ife_gencnt == gen)
409		return (ifp);
410	else
411		return (NULL);
412}
413
414/*
415 * Network interface utility routines.
416 *
417 * Routines with ifa_ifwith* names take sockaddr *'s as
418 * parameters.
419 */
420
421static void
422if_init_idxtable(void *arg __unused)
423{
424
425	ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table),
426	    M_IFNET, M_WAITOK | M_ZERO);
427}
428SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init_idxtable, NULL);
429
430static void
431vnet_if_init(const void *unused __unused)
432{
433
434	CK_STAILQ_INIT(&V_ifnet);
435	CK_STAILQ_INIT(&V_ifg_head);
436	vnet_if_clone_init();
437}
438VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
439    NULL);
440
441static void
442if_link_ifnet(struct ifnet *ifp)
443{
444
445	IFNET_WLOCK();
446	CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
447#ifdef VIMAGE
448	curvnet->vnet_ifcnt++;
449#endif
450	IFNET_WUNLOCK();
451}
452
453static bool
454if_unlink_ifnet(struct ifnet *ifp, bool vmove)
455{
456	struct ifnet *iter;
457	int found = 0;
458
459	IFNET_WLOCK();
460	CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
461		if (iter == ifp) {
462			CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link);
463			if (!vmove)
464				ifp->if_flags |= IFF_DYING;
465			found = 1;
466			break;
467		}
468#ifdef VIMAGE
469	curvnet->vnet_ifcnt--;
470#endif
471	IFNET_WUNLOCK();
472
473	return (found);
474}
475
476#ifdef VIMAGE
477static void
478vnet_if_return(const void *unused __unused)
479{
480	struct ifnet *ifp, *nifp;
481	struct ifnet **pending;
482	int found __diagused;
483	int i;
484
485	i = 0;
486
487	/*
488	 * We need to protect our access to the V_ifnet tailq. Ordinarily we'd
489	 * enter NET_EPOCH, but that's not possible, because if_vmove() calls
490	 * if_detach_internal(), which waits for NET_EPOCH callbacks to
491	 * complete. We can't do that from within NET_EPOCH.
492	 *
493	 * However, we can also use the IFNET_xLOCK, which is the V_ifnet
494	 * read/write lock. We cannot hold the lock as we call if_vmove()
495	 * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib
496	 * ctx lock.
497	 */
498	IFNET_WLOCK();
499
500	pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt,
501	    M_IFNET, M_WAITOK | M_ZERO);
502
503	/* Return all inherited interfaces to their parent vnets. */
504	CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
505		if (ifp->if_home_vnet != ifp->if_vnet) {
506			found = if_unlink_ifnet(ifp, true);
507			MPASS(found);
508
509			pending[i++] = ifp;
510		}
511	}
512	IFNET_WUNLOCK();
513
514	for (int j = 0; j < i; j++) {
515		sx_xlock(&ifnet_detach_sxlock);
516		if_vmove(pending[j], pending[j]->if_home_vnet);
517		sx_xunlock(&ifnet_detach_sxlock);
518	}
519
520	free(pending, M_IFNET);
521}
522VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
523    vnet_if_return, NULL);
524#endif
525
526/*
527 * Allocate a struct ifnet and an index for an interface.  A layer 2
528 * common structure will also be allocated if an allocation routine is
529 * registered for the passed type.
530 */
531static struct ifnet *
532if_alloc_domain(u_char type, int numa_domain)
533{
534	struct ifnet *ifp;
535	u_short idx;
536
537	KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
538	if (numa_domain == IF_NODOM)
539		ifp = malloc(sizeof(struct ifnet), M_IFNET,
540		    M_WAITOK | M_ZERO);
541	else
542		ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET,
543		    DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO);
544	ifp->if_type = type;
545	ifp->if_alloctype = type;
546	ifp->if_numa_domain = numa_domain;
547#ifdef VIMAGE
548	ifp->if_vnet = curvnet;
549#endif
550	if (if_com_alloc[type] != NULL) {
551		ifp->if_l2com = if_com_alloc[type](type, ifp);
552		KASSERT(ifp->if_l2com, ("%s: if_com_alloc[%u] failed", __func__,
553		    type));
554	}
555
556	IF_ADDR_LOCK_INIT(ifp);
557	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
558	TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp);
559	ifp->if_afdata_initialized = 0;
560	IF_AFDATA_LOCK_INIT(ifp);
561	CK_STAILQ_INIT(&ifp->if_addrhead);
562	CK_STAILQ_INIT(&ifp->if_multiaddrs);
563	CK_STAILQ_INIT(&ifp->if_groups);
564#ifdef MAC
565	mac_ifnet_init(ifp);
566#endif
567	ifq_init(&ifp->if_snd, ifp);
568
569	refcount_init(&ifp->if_refcount, 1);	/* Index reference. */
570	for (int i = 0; i < IFCOUNTERS; i++)
571		ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
572	ifp->if_get_counter = if_get_counter_default;
573	ifp->if_pcp = IFNET_PCP_NONE;
574
575	/* Allocate an ifindex array entry. */
576	IFNET_WLOCK();
577	/*
578	 * Try to find an empty slot below if_index.  If we fail, take the
579	 * next slot.
580	 */
581	for (idx = 1; idx <= if_index; idx++) {
582		if (ifindex_table[idx].ife_ifnet == NULL)
583			break;
584	}
585
586	/* Catch if_index overflow. */
587	if (idx >= if_indexlim) {
588		struct ifindex_entry *new, *old;
589		int newlim;
590
591		newlim = if_indexlim * 2;
592		new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO);
593		memcpy(new, ifindex_table, if_indexlim * sizeof(*new));
594		old = ifindex_table;
595		ck_pr_store_ptr(&ifindex_table, new);
596		if_indexlim = newlim;
597		epoch_wait_preempt(net_epoch_preempt);
598		free(old, M_IFNET);
599	}
600	if (idx > if_index)
601		if_index = idx;
602
603	ifp->if_index = idx;
604	ifp->if_idxgen = ifindex_table[idx].ife_gencnt;
605	ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp);
606	IFNET_WUNLOCK();
607
608	return (ifp);
609}
610
611struct ifnet *
612if_alloc_dev(u_char type, device_t dev)
613{
614	int numa_domain;
615
616	if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0)
617		return (if_alloc_domain(type, IF_NODOM));
618	return (if_alloc_domain(type, numa_domain));
619}
620
621struct ifnet *
622if_alloc(u_char type)
623{
624
625	return (if_alloc_domain(type, IF_NODOM));
626}
627/*
628 * Do the actual work of freeing a struct ifnet, and layer 2 common
629 * structure.  This call is made when the network epoch guarantees
630 * us that nobody holds a pointer to the interface.
631 */
632static void
633if_free_deferred(epoch_context_t ctx)
634{
635	struct ifnet *ifp = __containerof(ctx, struct ifnet, if_epoch_ctx);
636
637	KASSERT((ifp->if_flags & IFF_DYING),
638	    ("%s: interface not dying", __func__));
639
640	if (if_com_free[ifp->if_alloctype] != NULL)
641		if_com_free[ifp->if_alloctype](ifp->if_l2com,
642		    ifp->if_alloctype);
643
644#ifdef MAC
645	mac_ifnet_destroy(ifp);
646#endif /* MAC */
647	IF_AFDATA_DESTROY(ifp);
648	IF_ADDR_LOCK_DESTROY(ifp);
649	ifq_delete(&ifp->if_snd);
650
651	for (int i = 0; i < IFCOUNTERS; i++)
652		counter_u64_free(ifp->if_counters[i]);
653
654	if_freedescr(ifp->if_description);
655	free(ifp->if_hw_addr, M_IFADDR);
656	free(ifp, M_IFNET);
657}
658
659/*
660 * Deregister an interface and free the associated storage.
661 */
662void
663if_free(struct ifnet *ifp)
664{
665
666	ifp->if_flags |= IFF_DYING;			/* XXX: Locking */
667
668	/*
669	 * XXXGL: An interface index is really an alias to ifp pointer.
670	 * Why would we clear the alias now, and not in the deferred
671	 * context?  Indeed there is nothing wrong with some network
672	 * thread obtaining ifp via ifnet_byindex() inside the network
673	 * epoch and then dereferencing ifp while we perform if_free(),
674	 * and after if_free() finished, too.
675	 *
676	 * This early index freeing was important back when ifindex was
677	 * virtualized and interface would outlive the vnet.
678	 */
679	IFNET_WLOCK();
680	MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
681	ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL);
682	ifindex_table[ifp->if_index].ife_gencnt++;
683	while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL)
684		if_index--;
685	IFNET_WUNLOCK();
686
687	if (refcount_release(&ifp->if_refcount))
688		NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx);
689}
690
691/*
692 * Interfaces to keep an ifnet type-stable despite the possibility of the
693 * driver calling if_free().  If there are additional references, we defer
694 * freeing the underlying data structure.
695 */
696void
697if_ref(struct ifnet *ifp)
698{
699	u_int old __diagused;
700
701	/* We don't assert the ifnet list lock here, but arguably should. */
702	old = refcount_acquire(&ifp->if_refcount);
703	KASSERT(old > 0, ("%s: ifp %p has 0 refs", __func__, ifp));
704}
705
706bool
707if_try_ref(struct ifnet *ifp)
708{
709	NET_EPOCH_ASSERT();
710	return (refcount_acquire_if_not_zero(&ifp->if_refcount));
711}
712
713void
714if_rele(struct ifnet *ifp)
715{
716
717	if (!refcount_release(&ifp->if_refcount))
718		return;
719	NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx);
720}
721
722void
723ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
724{
725
726	mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
727
728	if (ifq->ifq_maxlen == 0)
729		ifq->ifq_maxlen = ifqmaxlen;
730
731	ifq->altq_type = 0;
732	ifq->altq_disc = NULL;
733	ifq->altq_flags &= ALTQF_CANTCHANGE;
734	ifq->altq_tbr  = NULL;
735	ifq->altq_ifp  = ifp;
736}
737
738void
739ifq_delete(struct ifaltq *ifq)
740{
741	mtx_destroy(&ifq->ifq_mtx);
742}
743
744/*
745 * Perform generic interface initialization tasks and attach the interface
746 * to the list of "active" interfaces.  If vmove flag is set on entry
747 * to if_attach_internal(), perform only a limited subset of initialization
748 * tasks, given that we are moving from one vnet to another an ifnet which
749 * has already been fully initialized.
750 *
751 * Note that if_detach_internal() removes group membership unconditionally
752 * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
753 * Thus, when if_vmove() is applied to a cloned interface, group membership
754 * is lost while a cloned one always joins a group whose name is
755 * ifc->ifc_name.  To recover this after if_detach_internal() and
756 * if_attach_internal(), the cloner should be specified to
757 * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
758 * attempts to join a group whose name is ifc->ifc_name.
759 *
760 * XXX:
761 *  - The decision to return void and thus require this function to
762 *    succeed is questionable.
763 *  - We should probably do more sanity checking.  For instance we don't
764 *    do anything to insure if_xname is unique or non-empty.
765 */
766void
767if_attach(struct ifnet *ifp)
768{
769
770	if_attach_internal(ifp, false);
771}
772
773/*
774 * Compute the least common TSO limit.
775 */
776void
777if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
778{
779	/*
780	 * 1) If there is no limit currently, take the limit from
781	 * the network adapter.
782	 *
783	 * 2) If the network adapter has a limit below the current
784	 * limit, apply it.
785	 */
786	if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
787	    ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
788		pmax->tsomaxbytes = ifp->if_hw_tsomax;
789	}
790	if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
791	    ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
792		pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
793	}
794	if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
795	    ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
796		pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
797	}
798}
799
800/*
801 * Update TSO limit of a network adapter.
802 *
803 * Returns zero if no change. Else non-zero.
804 */
805int
806if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
807{
808	int retval = 0;
809	if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
810		ifp->if_hw_tsomax = pmax->tsomaxbytes;
811		retval++;
812	}
813	if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
814		ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
815		retval++;
816	}
817	if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
818		ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
819		retval++;
820	}
821	return (retval);
822}
823
824static void
825if_attach_internal(struct ifnet *ifp, bool vmove)
826{
827	unsigned socksize, ifasize;
828	int namelen, masklen;
829	struct sockaddr_dl *sdl;
830	struct ifaddr *ifa;
831
832	MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
833
834#ifdef VIMAGE
835	ifp->if_vnet = curvnet;
836	if (ifp->if_home_vnet == NULL)
837		ifp->if_home_vnet = curvnet;
838#endif
839
840	if_addgroup(ifp, IFG_ALL);
841
842#ifdef VIMAGE
843	/* Restore group membership for cloned interface. */
844	if (vmove)
845		if_clone_restoregroup(ifp);
846#endif
847
848	getmicrotime(&ifp->if_lastchange);
849	ifp->if_epoch = time_uptime;
850
851	KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
852	    (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
853	    ("transmit and qflush must both either be set or both be NULL"));
854	if (ifp->if_transmit == NULL) {
855		ifp->if_transmit = if_transmit_default;
856		ifp->if_qflush = if_qflush;
857	}
858	if (ifp->if_input == NULL)
859		ifp->if_input = if_input_default;
860
861	if (ifp->if_requestencap == NULL)
862		ifp->if_requestencap = if_requestencap_default;
863
864	if (!vmove) {
865#ifdef MAC
866		mac_ifnet_create(ifp);
867#endif
868
869		/*
870		 * Create a Link Level name for this device.
871		 */
872		namelen = strlen(ifp->if_xname);
873		/*
874		 * Always save enough space for any possiable name so we
875		 * can do a rename in place later.
876		 */
877		masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
878		socksize = masklen + ifp->if_addrlen;
879		if (socksize < sizeof(*sdl))
880			socksize = sizeof(*sdl);
881		socksize = roundup2(socksize, sizeof(long));
882		ifasize = sizeof(*ifa) + 2 * socksize;
883		ifa = ifa_alloc(ifasize, M_WAITOK);
884		sdl = (struct sockaddr_dl *)(ifa + 1);
885		sdl->sdl_len = socksize;
886		sdl->sdl_family = AF_LINK;
887		bcopy(ifp->if_xname, sdl->sdl_data, namelen);
888		sdl->sdl_nlen = namelen;
889		sdl->sdl_index = ifp->if_index;
890		sdl->sdl_type = ifp->if_type;
891		ifp->if_addr = ifa;
892		ifa->ifa_ifp = ifp;
893		ifa->ifa_addr = (struct sockaddr *)sdl;
894		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
895		ifa->ifa_netmask = (struct sockaddr *)sdl;
896		sdl->sdl_len = masklen;
897		while (namelen != 0)
898			sdl->sdl_data[--namelen] = 0xff;
899		CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
900		/* Reliably crash if used uninitialized. */
901		ifp->if_broadcastaddr = NULL;
902
903		if (ifp->if_type == IFT_ETHER) {
904			ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
905			    M_WAITOK | M_ZERO);
906		}
907
908#if defined(INET) || defined(INET6)
909		/* Use defaults for TSO, if nothing is set */
910		if (ifp->if_hw_tsomax == 0 &&
911		    ifp->if_hw_tsomaxsegcount == 0 &&
912		    ifp->if_hw_tsomaxsegsize == 0) {
913			/*
914			 * The TSO defaults needs to be such that an
915			 * NFS mbuf list of 35 mbufs totalling just
916			 * below 64K works and that a chain of mbufs
917			 * can be defragged into at most 32 segments:
918			 */
919			ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
920			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
921			ifp->if_hw_tsomaxsegcount = 35;
922			ifp->if_hw_tsomaxsegsize = 2048;	/* 2K */
923
924			/* XXX some drivers set IFCAP_TSO after ethernet attach */
925			if (ifp->if_capabilities & IFCAP_TSO) {
926				if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
927				    ifp->if_hw_tsomax,
928				    ifp->if_hw_tsomaxsegcount,
929				    ifp->if_hw_tsomaxsegsize);
930			}
931		}
932#endif
933	}
934#ifdef VIMAGE
935	else {
936		/*
937		 * Update the interface index in the link layer address
938		 * of the interface.
939		 */
940		for (ifa = ifp->if_addr; ifa != NULL;
941		    ifa = CK_STAILQ_NEXT(ifa, ifa_link)) {
942			if (ifa->ifa_addr->sa_family == AF_LINK) {
943				sdl = (struct sockaddr_dl *)ifa->ifa_addr;
944				sdl->sdl_index = ifp->if_index;
945			}
946		}
947	}
948#endif
949
950	if_link_ifnet(ifp);
951
952	if (domain_init_status >= 2)
953		if_attachdomain1(ifp);
954
955	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
956	if (IS_DEFAULT_VNET(curvnet))
957		devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
958}
959
960static void
961if_epochalloc(void *dummy __unused)
962{
963
964	net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT);
965}
966SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL);
967
968static void
969if_attachdomain(void *dummy)
970{
971	struct ifnet *ifp;
972
973	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
974		if_attachdomain1(ifp);
975}
976SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
977    if_attachdomain, NULL);
978
979static void
980if_attachdomain1(struct ifnet *ifp)
981{
982	struct domain *dp;
983
984	/*
985	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
986	 * cannot lock ifp->if_afdata initialization, entirely.
987	 */
988	IF_AFDATA_LOCK(ifp);
989	if (ifp->if_afdata_initialized >= domain_init_status) {
990		IF_AFDATA_UNLOCK(ifp);
991		log(LOG_WARNING, "%s called more than once on %s\n",
992		    __func__, ifp->if_xname);
993		return;
994	}
995	ifp->if_afdata_initialized = domain_init_status;
996	IF_AFDATA_UNLOCK(ifp);
997
998	/* address family dependent data region */
999	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
1000	SLIST_FOREACH(dp, &domains, dom_next) {
1001		if (dp->dom_ifattach)
1002			ifp->if_afdata[dp->dom_family] =
1003			    (*dp->dom_ifattach)(ifp);
1004	}
1005}
1006
1007/*
1008 * Remove any unicast or broadcast network addresses from an interface.
1009 */
1010void
1011if_purgeaddrs(struct ifnet *ifp)
1012{
1013	struct ifaddr *ifa;
1014
1015#ifdef INET6
1016	/*
1017	 * Need to leave multicast addresses of proxy NDP llentries
1018	 * before in6_purgeifaddr() because the llentries are keys
1019	 * for in6_multi objects of proxy NDP entries.
1020	 * in6_purgeifaddr()s clean up llentries including proxy NDPs
1021	 * then we would lose the keys if they are called earlier.
1022	 */
1023	in6_purge_proxy_ndp(ifp);
1024#endif
1025	while (1) {
1026		struct epoch_tracker et;
1027
1028		NET_EPOCH_ENTER(et);
1029		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1030			if (ifa->ifa_addr->sa_family != AF_LINK)
1031				break;
1032		}
1033		NET_EPOCH_EXIT(et);
1034
1035		if (ifa == NULL)
1036			break;
1037#ifdef INET
1038		/* XXX: Ugly!! ad hoc just for INET */
1039		if (ifa->ifa_addr->sa_family == AF_INET) {
1040			struct ifaliasreq ifr;
1041
1042			bzero(&ifr, sizeof(ifr));
1043			ifr.ifra_addr = *ifa->ifa_addr;
1044			if (ifa->ifa_dstaddr)
1045				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
1046			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
1047			    NULL) == 0)
1048				continue;
1049		}
1050#endif /* INET */
1051#ifdef INET6
1052		if (ifa->ifa_addr->sa_family == AF_INET6) {
1053			in6_purgeifaddr((struct in6_ifaddr *)ifa);
1054			/* ifp_addrhead is already updated */
1055			continue;
1056		}
1057#endif /* INET6 */
1058		IF_ADDR_WLOCK(ifp);
1059		CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
1060		IF_ADDR_WUNLOCK(ifp);
1061		ifa_free(ifa);
1062	}
1063}
1064
1065/*
1066 * Remove any multicast network addresses from an interface when an ifnet
1067 * is going away.
1068 */
1069static void
1070if_purgemaddrs(struct ifnet *ifp)
1071{
1072	struct ifmultiaddr *ifma;
1073
1074	IF_ADDR_WLOCK(ifp);
1075	while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) {
1076		ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs);
1077		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
1078		if_delmulti_locked(ifp, ifma, 1);
1079	}
1080	IF_ADDR_WUNLOCK(ifp);
1081}
1082
1083/*
1084 * Detach an interface, removing it from the list of "active" interfaces.
1085 * If vmove flag is set on entry to if_detach_internal(), perform only a
1086 * limited subset of cleanup tasks, given that we are moving an ifnet from
1087 * one vnet to another, where it must be fully operational.
1088 *
1089 * XXXRW: There are some significant questions about event ordering, and
1090 * how to prevent things from starting to use the interface during detach.
1091 */
1092void
1093if_detach(struct ifnet *ifp)
1094{
1095	bool found;
1096
1097	CURVNET_SET_QUIET(ifp->if_vnet);
1098	found = if_unlink_ifnet(ifp, false);
1099	if (found) {
1100		sx_xlock(&ifnet_detach_sxlock);
1101		if_detach_internal(ifp, false);
1102		sx_xunlock(&ifnet_detach_sxlock);
1103	}
1104	CURVNET_RESTORE();
1105}
1106
1107/*
1108 * The vmove flag, if set, indicates that we are called from a callpath
1109 * that is moving an interface to a different vnet instance.
1110 *
1111 * The shutdown flag, if set, indicates that we are called in the
1112 * process of shutting down a vnet instance.  Currently only the
1113 * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
1114 * on a vnet instance shutdown without this flag being set, e.g., when
1115 * the cloned interfaces are destoyed as first thing of teardown.
1116 */
1117static int
1118if_detach_internal(struct ifnet *ifp, bool vmove)
1119{
1120	struct ifaddr *ifa;
1121	int i;
1122	struct domain *dp;
1123#ifdef VIMAGE
1124	bool shutdown;
1125
1126	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
1127#endif
1128
1129	sx_assert(&ifnet_detach_sxlock, SX_XLOCKED);
1130
1131	/*
1132	 * At this point we know the interface still was on the ifnet list
1133	 * and we removed it so we are in a stable state.
1134	 */
1135	epoch_wait_preempt(net_epoch_preempt);
1136
1137	/*
1138	 * Ensure all pending EPOCH(9) callbacks have been executed. This
1139	 * fixes issues about late destruction of multicast options
1140	 * which lead to leave group calls, which in turn access the
1141	 * belonging ifnet structure:
1142	 */
1143	NET_EPOCH_DRAIN_CALLBACKS();
1144
1145	/*
1146	 * In any case (destroy or vmove) detach us from the groups
1147	 * and remove/wait for pending events on the taskq.
1148	 * XXX-BZ in theory an interface could still enqueue a taskq change?
1149	 */
1150	if_delgroups(ifp);
1151
1152	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
1153	taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask);
1154
1155	if_down(ifp);
1156
1157#ifdef VIMAGE
1158	/*
1159	 * On VNET shutdown abort here as the stack teardown will do all
1160	 * the work top-down for us.
1161	 */
1162	if (shutdown) {
1163		/* Give interface users the chance to clean up. */
1164		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1165
1166		/*
1167		 * In case of a vmove we are done here without error.
1168		 * If we would signal an error it would lead to the same
1169		 * abort as if we did not find the ifnet anymore.
1170		 * if_detach() calls us in void context and does not care
1171		 * about an early abort notification, so life is splendid :)
1172		 */
1173		goto finish_vnet_shutdown;
1174	}
1175#endif
1176
1177	/*
1178	 * At this point we are not tearing down a VNET and are either
1179	 * going to destroy or vmove the interface and have to cleanup
1180	 * accordingly.
1181	 */
1182
1183	/*
1184	 * Remove routes and flush queues.
1185	 */
1186#ifdef ALTQ
1187	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1188		altq_disable(&ifp->if_snd);
1189	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1190		altq_detach(&ifp->if_snd);
1191#endif
1192
1193	if_purgeaddrs(ifp);
1194
1195#ifdef INET
1196	in_ifdetach(ifp);
1197#endif
1198
1199#ifdef INET6
1200	/*
1201	 * Remove all IPv6 kernel structs related to ifp.  This should be done
1202	 * before removing routing entries below, since IPv6 interface direct
1203	 * routes are expected to be removed by the IPv6-specific kernel API.
1204	 * Otherwise, the kernel will detect some inconsistency and bark it.
1205	 */
1206	in6_ifdetach(ifp);
1207#endif
1208	if_purgemaddrs(ifp);
1209
1210	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1211	if (IS_DEFAULT_VNET(curvnet))
1212		devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
1213
1214	if (!vmove) {
1215		/*
1216		 * Prevent further calls into the device driver via ifnet.
1217		 */
1218		if_dead(ifp);
1219
1220		/*
1221		 * Clean up all addresses.
1222		 */
1223		IF_ADDR_WLOCK(ifp);
1224		if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) {
1225			ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
1226			CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
1227			IF_ADDR_WUNLOCK(ifp);
1228			ifa_free(ifa);
1229		} else
1230			IF_ADDR_WUNLOCK(ifp);
1231	}
1232
1233	rt_flushifroutes(ifp);
1234
1235#ifdef VIMAGE
1236finish_vnet_shutdown:
1237#endif
1238	/*
1239	 * We cannot hold the lock over dom_ifdetach calls as they might
1240	 * sleep, for example trying to drain a callout, thus open up the
1241	 * theoretical race with re-attaching.
1242	 */
1243	IF_AFDATA_LOCK(ifp);
1244	i = ifp->if_afdata_initialized;
1245	ifp->if_afdata_initialized = 0;
1246	IF_AFDATA_UNLOCK(ifp);
1247	if (i == 0)
1248		return (0);
1249	SLIST_FOREACH(dp, &domains, dom_next) {
1250		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
1251			(*dp->dom_ifdetach)(ifp,
1252			    ifp->if_afdata[dp->dom_family]);
1253			ifp->if_afdata[dp->dom_family] = NULL;
1254		}
1255	}
1256
1257	return (0);
1258}
1259
1260#ifdef VIMAGE
1261/*
1262 * if_vmove() performs a limited version of if_detach() in current
1263 * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
1264 */
1265static int
1266if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
1267{
1268#ifdef DEV_BPF
1269	u_int bif_dlt, bif_hdrlen;
1270#endif
1271	int rc;
1272
1273#ifdef DEV_BPF
1274 	/*
1275	 * if_detach_internal() will call the eventhandler to notify
1276	 * interface departure.  That will detach if_bpf.  We need to
1277	 * safe the dlt and hdrlen so we can re-attach it later.
1278	 */
1279	bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
1280#endif
1281
1282	/*
1283	 * Detach from current vnet, but preserve LLADDR info, do not
1284	 * mark as dead etc. so that the ifnet can be reattached later.
1285	 * If we cannot find it, we lost the race to someone else.
1286	 */
1287	rc = if_detach_internal(ifp, true);
1288	if (rc != 0)
1289		return (rc);
1290
1291	/*
1292	 * Perform interface-specific reassignment tasks, if provided by
1293	 * the driver.
1294	 */
1295	if (ifp->if_reassign != NULL)
1296		ifp->if_reassign(ifp, new_vnet, NULL);
1297
1298	/*
1299	 * Switch to the context of the target vnet.
1300	 */
1301	CURVNET_SET_QUIET(new_vnet);
1302	if_attach_internal(ifp, true);
1303
1304#ifdef DEV_BPF
1305	if (ifp->if_bpf == NULL)
1306		bpfattach(ifp, bif_dlt, bif_hdrlen);
1307#endif
1308
1309	CURVNET_RESTORE();
1310	return (0);
1311}
1312
1313/*
1314 * Move an ifnet to or from another child prison/vnet, specified by the jail id.
1315 */
1316static int
1317if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
1318{
1319	struct prison *pr;
1320	struct ifnet *difp;
1321	int error;
1322	bool found __diagused;
1323	bool shutdown;
1324
1325	MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
1326
1327	/* Try to find the prison within our visibility. */
1328	sx_slock(&allprison_lock);
1329	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1330	sx_sunlock(&allprison_lock);
1331	if (pr == NULL)
1332		return (ENXIO);
1333	prison_hold_locked(pr);
1334	mtx_unlock(&pr->pr_mtx);
1335
1336	/* Do not try to move the iface from and to the same prison. */
1337	if (pr->pr_vnet == ifp->if_vnet) {
1338		prison_free(pr);
1339		return (EEXIST);
1340	}
1341
1342	/* Make sure the named iface does not exists in the dst. prison/vnet. */
1343	/* XXX Lock interfaces to avoid races. */
1344	CURVNET_SET_QUIET(pr->pr_vnet);
1345	difp = ifunit(ifname);
1346	if (difp != NULL) {
1347		CURVNET_RESTORE();
1348		prison_free(pr);
1349		return (EEXIST);
1350	}
1351	sx_xlock(&ifnet_detach_sxlock);
1352
1353	/* Make sure the VNET is stable. */
1354	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
1355	if (shutdown) {
1356		sx_xunlock(&ifnet_detach_sxlock);
1357		CURVNET_RESTORE();
1358		prison_free(pr);
1359		return (EBUSY);
1360	}
1361	CURVNET_RESTORE();
1362
1363	found = if_unlink_ifnet(ifp, true);
1364	if (! found) {
1365		sx_xunlock(&ifnet_detach_sxlock);
1366		CURVNET_RESTORE();
1367		prison_free(pr);
1368		return (ENODEV);
1369	}
1370
1371	/* Move the interface into the child jail/vnet. */
1372	error = if_vmove(ifp, pr->pr_vnet);
1373
1374	/* Report the new if_xname back to the userland on success. */
1375	if (error == 0)
1376		sprintf(ifname, "%s", ifp->if_xname);
1377
1378	sx_xunlock(&ifnet_detach_sxlock);
1379
1380	prison_free(pr);
1381	return (error);
1382}
1383
1384static int
1385if_vmove_reclaim(struct thread *td, char *ifname, int jid)
1386{
1387	struct prison *pr;
1388	struct vnet *vnet_dst;
1389	struct ifnet *ifp;
1390	int error, found __diagused;
1391 	bool shutdown;
1392
1393	/* Try to find the prison within our visibility. */
1394	sx_slock(&allprison_lock);
1395	pr = prison_find_child(td->td_ucred->cr_prison, jid);
1396	sx_sunlock(&allprison_lock);
1397	if (pr == NULL)
1398		return (ENXIO);
1399	prison_hold_locked(pr);
1400	mtx_unlock(&pr->pr_mtx);
1401
1402	/* Make sure the named iface exists in the source prison/vnet. */
1403	CURVNET_SET(pr->pr_vnet);
1404	ifp = ifunit(ifname);		/* XXX Lock to avoid races. */
1405	if (ifp == NULL) {
1406		CURVNET_RESTORE();
1407		prison_free(pr);
1408		return (ENXIO);
1409	}
1410
1411	/* Do not try to move the iface from and to the same prison. */
1412	vnet_dst = TD_TO_VNET(td);
1413	if (vnet_dst == ifp->if_vnet) {
1414		CURVNET_RESTORE();
1415		prison_free(pr);
1416		return (EEXIST);
1417	}
1418
1419	/* Make sure the VNET is stable. */
1420	shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
1421	if (shutdown) {
1422		CURVNET_RESTORE();
1423		prison_free(pr);
1424		return (EBUSY);
1425	}
1426
1427	/* Get interface back from child jail/vnet. */
1428	found = if_unlink_ifnet(ifp, true);
1429	MPASS(found);
1430	sx_xlock(&ifnet_detach_sxlock);
1431	error = if_vmove(ifp, vnet_dst);
1432	sx_xunlock(&ifnet_detach_sxlock);
1433	CURVNET_RESTORE();
1434
1435	/* Report the new if_xname back to the userland on success. */
1436	if (error == 0)
1437		sprintf(ifname, "%s", ifp->if_xname);
1438
1439	prison_free(pr);
1440	return (error);
1441}
1442#endif /* VIMAGE */
1443
1444/*
1445 * Add a group to an interface
1446 */
1447int
1448if_addgroup(struct ifnet *ifp, const char *groupname)
1449{
1450	struct ifg_list		*ifgl;
1451	struct ifg_group	*ifg = NULL;
1452	struct ifg_member	*ifgm;
1453	int 			 new = 0;
1454
1455	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1456	    groupname[strlen(groupname) - 1] <= '9')
1457		return (EINVAL);
1458
1459	IFNET_WLOCK();
1460	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1461		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
1462			IFNET_WUNLOCK();
1463			return (EEXIST);
1464		}
1465
1466	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) {
1467	    	IFNET_WUNLOCK();
1468		return (ENOMEM);
1469	}
1470
1471	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
1472		free(ifgl, M_TEMP);
1473		IFNET_WUNLOCK();
1474		return (ENOMEM);
1475	}
1476
1477	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1478		if (!strcmp(ifg->ifg_group, groupname))
1479			break;
1480
1481	if (ifg == NULL) {
1482		if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) {
1483			free(ifgl, M_TEMP);
1484			free(ifgm, M_TEMP);
1485			IFNET_WUNLOCK();
1486			return (ENOMEM);
1487		}
1488		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1489		ifg->ifg_refcnt = 0;
1490		CK_STAILQ_INIT(&ifg->ifg_members);
1491		CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
1492		new = 1;
1493	}
1494
1495	ifg->ifg_refcnt++;
1496	ifgl->ifgl_group = ifg;
1497	ifgm->ifgm_ifp = ifp;
1498
1499	IF_ADDR_WLOCK(ifp);
1500	CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1501	CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1502	IF_ADDR_WUNLOCK(ifp);
1503
1504	IFNET_WUNLOCK();
1505
1506	if (new)
1507		EVENTHANDLER_INVOKE(group_attach_event, ifg);
1508	EVENTHANDLER_INVOKE(group_change_event, groupname);
1509
1510	return (0);
1511}
1512
1513/*
1514 * Helper function to remove a group out of an interface.  Expects the global
1515 * ifnet lock to be write-locked, and drops it before returning.
1516 */
1517static void
1518_if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl,
1519    const char *groupname)
1520{
1521	struct ifg_member *ifgm;
1522	bool freeifgl;
1523
1524	IFNET_WLOCK_ASSERT();
1525
1526	IF_ADDR_WLOCK(ifp);
1527	CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
1528	IF_ADDR_WUNLOCK(ifp);
1529
1530	CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
1531		if (ifgm->ifgm_ifp == ifp) {
1532			CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
1533			    ifg_member, ifgm_next);
1534			break;
1535		}
1536	}
1537
1538	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1539		CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group,
1540		    ifg_next);
1541		freeifgl = true;
1542	} else {
1543		freeifgl = false;
1544	}
1545	IFNET_WUNLOCK();
1546
1547	epoch_wait_preempt(net_epoch_preempt);
1548	EVENTHANDLER_INVOKE(group_change_event, groupname);
1549	if (freeifgl) {
1550		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
1551		free(ifgl->ifgl_group, M_TEMP);
1552	}
1553	free(ifgm, M_TEMP);
1554	free(ifgl, M_TEMP);
1555}
1556
1557/*
1558 * Remove a group from an interface
1559 */
1560int
1561if_delgroup(struct ifnet *ifp, const char *groupname)
1562{
1563	struct ifg_list *ifgl;
1564
1565	IFNET_WLOCK();
1566	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1567		if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
1568			break;
1569	if (ifgl == NULL) {
1570		IFNET_WUNLOCK();
1571		return (ENOENT);
1572	}
1573
1574	_if_delgroup_locked(ifp, ifgl, groupname);
1575
1576	return (0);
1577}
1578
1579/*
1580 * Remove an interface from all groups
1581 */
1582static void
1583if_delgroups(struct ifnet *ifp)
1584{
1585	struct ifg_list *ifgl;
1586	char groupname[IFNAMSIZ];
1587
1588	IFNET_WLOCK();
1589	while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) {
1590		strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
1591		_if_delgroup_locked(ifp, ifgl, groupname);
1592		IFNET_WLOCK();
1593	}
1594	IFNET_WUNLOCK();
1595}
1596
1597/*
1598 * Stores all groups from an interface in memory pointed to by ifgr.
1599 */
1600static int
1601if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
1602{
1603	int			 len, error;
1604	struct ifg_list		*ifgl;
1605	struct ifg_req		 ifgrq, *ifgp;
1606
1607	NET_EPOCH_ASSERT();
1608
1609	if (ifgr->ifgr_len == 0) {
1610		CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1611			ifgr->ifgr_len += sizeof(struct ifg_req);
1612		return (0);
1613	}
1614
1615	len = ifgr->ifgr_len;
1616	ifgp = ifgr->ifgr_groups;
1617	/* XXX: wire */
1618	CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1619		if (len < sizeof(ifgrq))
1620			return (EINVAL);
1621		bzero(&ifgrq, sizeof ifgrq);
1622		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1623		    sizeof(ifgrq.ifgrq_group));
1624		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req))))
1625			return (error);
1626		len -= sizeof(ifgrq);
1627		ifgp++;
1628	}
1629
1630	return (0);
1631}
1632
1633/*
1634 * Stores all members of a group in memory pointed to by igfr
1635 */
1636static int
1637if_getgroupmembers(struct ifgroupreq *ifgr)
1638{
1639	struct ifg_group	*ifg;
1640	struct ifg_member	*ifgm;
1641	struct ifg_req		 ifgrq, *ifgp;
1642	int			 len, error;
1643
1644	IFNET_RLOCK();
1645	CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
1646		if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
1647			break;
1648	if (ifg == NULL) {
1649		IFNET_RUNLOCK();
1650		return (ENOENT);
1651	}
1652
1653	if (ifgr->ifgr_len == 0) {
1654		CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1655			ifgr->ifgr_len += sizeof(ifgrq);
1656		IFNET_RUNLOCK();
1657		return (0);
1658	}
1659
1660	len = ifgr->ifgr_len;
1661	ifgp = ifgr->ifgr_groups;
1662	CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1663		if (len < sizeof(ifgrq)) {
1664			IFNET_RUNLOCK();
1665			return (EINVAL);
1666		}
1667		bzero(&ifgrq, sizeof ifgrq);
1668		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1669		    sizeof(ifgrq.ifgrq_member));
1670		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
1671			IFNET_RUNLOCK();
1672			return (error);
1673		}
1674		len -= sizeof(ifgrq);
1675		ifgp++;
1676	}
1677	IFNET_RUNLOCK();
1678
1679	return (0);
1680}
1681
1682/*
1683 * Return counter values from counter(9)s stored in ifnet.
1684 */
1685uint64_t
1686if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
1687{
1688
1689	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1690
1691	return (counter_u64_fetch(ifp->if_counters[cnt]));
1692}
1693
1694/*
1695 * Increase an ifnet counter. Usually used for counters shared
1696 * between the stack and a driver, but function supports them all.
1697 */
1698void
1699if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
1700{
1701
1702	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1703
1704	counter_u64_add(ifp->if_counters[cnt], inc);
1705}
1706
1707/*
1708 * Copy data from ifnet to userland API structure if_data.
1709 */
1710void
1711if_data_copy(struct ifnet *ifp, struct if_data *ifd)
1712{
1713
1714	ifd->ifi_type = ifp->if_type;
1715	ifd->ifi_physical = 0;
1716	ifd->ifi_addrlen = ifp->if_addrlen;
1717	ifd->ifi_hdrlen = ifp->if_hdrlen;
1718	ifd->ifi_link_state = ifp->if_link_state;
1719	ifd->ifi_vhid = 0;
1720	ifd->ifi_datalen = sizeof(struct if_data);
1721	ifd->ifi_mtu = ifp->if_mtu;
1722	ifd->ifi_metric = ifp->if_metric;
1723	ifd->ifi_baudrate = ifp->if_baudrate;
1724	ifd->ifi_hwassist = ifp->if_hwassist;
1725	ifd->ifi_epoch = ifp->if_epoch;
1726	ifd->ifi_lastchange = ifp->if_lastchange;
1727
1728	ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
1729	ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
1730	ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
1731	ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
1732	ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
1733	ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
1734	ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
1735	ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
1736	ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
1737	ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
1738	ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
1739	ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
1740}
1741
1742/*
1743 * Initialization, destruction and refcounting functions for ifaddrs.
1744 */
1745struct ifaddr *
1746ifa_alloc(size_t size, int flags)
1747{
1748	struct ifaddr *ifa;
1749
1750	KASSERT(size >= sizeof(struct ifaddr),
1751	    ("%s: invalid size %zu", __func__, size));
1752
1753	ifa = malloc(size, M_IFADDR, M_ZERO | flags);
1754	if (ifa == NULL)
1755		return (NULL);
1756
1757	if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
1758		goto fail;
1759	if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
1760		goto fail;
1761	if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
1762		goto fail;
1763	if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
1764		goto fail;
1765
1766	refcount_init(&ifa->ifa_refcnt, 1);
1767
1768	return (ifa);
1769
1770fail:
1771	/* free(NULL) is okay */
1772	counter_u64_free(ifa->ifa_opackets);
1773	counter_u64_free(ifa->ifa_ipackets);
1774	counter_u64_free(ifa->ifa_obytes);
1775	counter_u64_free(ifa->ifa_ibytes);
1776	free(ifa, M_IFADDR);
1777
1778	return (NULL);
1779}
1780
1781void
1782ifa_ref(struct ifaddr *ifa)
1783{
1784	u_int old __diagused;
1785
1786	old = refcount_acquire(&ifa->ifa_refcnt);
1787	KASSERT(old > 0, ("%s: ifa %p has 0 refs", __func__, ifa));
1788}
1789
1790int
1791ifa_try_ref(struct ifaddr *ifa)
1792{
1793
1794	NET_EPOCH_ASSERT();
1795	return (refcount_acquire_if_not_zero(&ifa->ifa_refcnt));
1796}
1797
1798static void
1799ifa_destroy(epoch_context_t ctx)
1800{
1801	struct ifaddr *ifa;
1802
1803	ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx);
1804	counter_u64_free(ifa->ifa_opackets);
1805	counter_u64_free(ifa->ifa_ipackets);
1806	counter_u64_free(ifa->ifa_obytes);
1807	counter_u64_free(ifa->ifa_ibytes);
1808	free(ifa, M_IFADDR);
1809}
1810
1811void
1812ifa_free(struct ifaddr *ifa)
1813{
1814
1815	if (refcount_release(&ifa->ifa_refcnt))
1816		NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx);
1817}
1818
1819/*
1820 * XXX: Because sockaddr_dl has deeper structure than the sockaddr
1821 * structs used to represent other address families, it is necessary
1822 * to perform a different comparison.
1823 */
1824
1825#define	sa_dl_equal(a1, a2)	\
1826	((((const struct sockaddr_dl *)(a1))->sdl_len ==		\
1827	 ((const struct sockaddr_dl *)(a2))->sdl_len) &&		\
1828	 (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),		\
1829	       CLLADDR((const struct sockaddr_dl *)(a2)),		\
1830	       ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
1831
1832/*
1833 * Locate an interface based on a complete address.
1834 */
1835/*ARGSUSED*/
1836struct ifaddr *
1837ifa_ifwithaddr(const struct sockaddr *addr)
1838{
1839	struct ifnet *ifp;
1840	struct ifaddr *ifa;
1841
1842	NET_EPOCH_ASSERT();
1843
1844	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1845		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1846			if (ifa->ifa_addr->sa_family != addr->sa_family)
1847				continue;
1848			if (sa_equal(addr, ifa->ifa_addr)) {
1849				goto done;
1850			}
1851			/* IP6 doesn't have broadcast */
1852			if ((ifp->if_flags & IFF_BROADCAST) &&
1853			    ifa->ifa_broadaddr &&
1854			    ifa->ifa_broadaddr->sa_len != 0 &&
1855			    sa_equal(ifa->ifa_broadaddr, addr)) {
1856				goto done;
1857			}
1858		}
1859	}
1860	ifa = NULL;
1861done:
1862	return (ifa);
1863}
1864
1865int
1866ifa_ifwithaddr_check(const struct sockaddr *addr)
1867{
1868	struct epoch_tracker et;
1869	int rc;
1870
1871	NET_EPOCH_ENTER(et);
1872	rc = (ifa_ifwithaddr(addr) != NULL);
1873	NET_EPOCH_EXIT(et);
1874	return (rc);
1875}
1876
1877/*
1878 * Locate an interface based on the broadcast address.
1879 */
1880/* ARGSUSED */
1881struct ifaddr *
1882ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
1883{
1884	struct ifnet *ifp;
1885	struct ifaddr *ifa;
1886
1887	NET_EPOCH_ASSERT();
1888	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1889		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1890			continue;
1891		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1892			if (ifa->ifa_addr->sa_family != addr->sa_family)
1893				continue;
1894			if ((ifp->if_flags & IFF_BROADCAST) &&
1895			    ifa->ifa_broadaddr &&
1896			    ifa->ifa_broadaddr->sa_len != 0 &&
1897			    sa_equal(ifa->ifa_broadaddr, addr)) {
1898				goto done;
1899			}
1900		}
1901	}
1902	ifa = NULL;
1903done:
1904	return (ifa);
1905}
1906
1907/*
1908 * Locate the point to point interface with a given destination address.
1909 */
1910/*ARGSUSED*/
1911struct ifaddr *
1912ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
1913{
1914	struct ifnet *ifp;
1915	struct ifaddr *ifa;
1916
1917	NET_EPOCH_ASSERT();
1918	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1919		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1920			continue;
1921		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1922			continue;
1923		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1924			if (ifa->ifa_addr->sa_family != addr->sa_family)
1925				continue;
1926			if (ifa->ifa_dstaddr != NULL &&
1927			    sa_equal(addr, ifa->ifa_dstaddr)) {
1928				goto done;
1929			}
1930		}
1931	}
1932	ifa = NULL;
1933done:
1934	return (ifa);
1935}
1936
1937/*
1938 * Find an interface on a specific network.  If many, choice
1939 * is most specific found.
1940 */
1941struct ifaddr *
1942ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
1943{
1944	struct ifnet *ifp;
1945	struct ifaddr *ifa;
1946	struct ifaddr *ifa_maybe = NULL;
1947	u_int af = addr->sa_family;
1948	const char *addr_data = addr->sa_data, *cplim;
1949
1950	NET_EPOCH_ASSERT();
1951	/*
1952	 * AF_LINK addresses can be looked up directly by their index number,
1953	 * so do that if we can.
1954	 */
1955	if (af == AF_LINK) {
1956		ifp = ifnet_byindex(
1957		    ((const struct sockaddr_dl *)addr)->sdl_index);
1958		return (ifp ? ifp->if_addr : NULL);
1959	}
1960
1961	/*
1962	 * Scan though each interface, looking for ones that have addresses
1963	 * in this address family and the requested fib.
1964	 */
1965	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1966		if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
1967			continue;
1968		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1969			const char *cp, *cp2, *cp3;
1970
1971			if (ifa->ifa_addr->sa_family != af)
1972next:				continue;
1973			if (af == AF_INET &&
1974			    ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
1975				/*
1976				 * This is a bit broken as it doesn't
1977				 * take into account that the remote end may
1978				 * be a single node in the network we are
1979				 * looking for.
1980				 * The trouble is that we don't know the
1981				 * netmask for the remote end.
1982				 */
1983				if (ifa->ifa_dstaddr != NULL &&
1984				    sa_equal(addr, ifa->ifa_dstaddr)) {
1985					goto done;
1986				}
1987			} else {
1988				/*
1989				 * Scan all the bits in the ifa's address.
1990				 * If a bit dissagrees with what we are
1991				 * looking for, mask it with the netmask
1992				 * to see if it really matters.
1993				 * (A byte at a time)
1994				 */
1995				if (ifa->ifa_netmask == 0)
1996					continue;
1997				cp = addr_data;
1998				cp2 = ifa->ifa_addr->sa_data;
1999				cp3 = ifa->ifa_netmask->sa_data;
2000				cplim = ifa->ifa_netmask->sa_len
2001					+ (char *)ifa->ifa_netmask;
2002				while (cp3 < cplim)
2003					if ((*cp++ ^ *cp2++) & *cp3++)
2004						goto next; /* next address! */
2005				/*
2006				 * If the netmask of what we just found
2007				 * is more specific than what we had before
2008				 * (if we had one), or if the virtual status
2009				 * of new prefix is better than of the old one,
2010				 * then remember the new one before continuing
2011				 * to search for an even better one.
2012				 */
2013				if (ifa_maybe == NULL ||
2014				    ifa_preferred(ifa_maybe, ifa) ||
2015				    rn_refines((caddr_t)ifa->ifa_netmask,
2016				    (caddr_t)ifa_maybe->ifa_netmask)) {
2017					ifa_maybe = ifa;
2018				}
2019			}
2020		}
2021	}
2022	ifa = ifa_maybe;
2023	ifa_maybe = NULL;
2024done:
2025	return (ifa);
2026}
2027
2028/*
2029 * Find an interface address specific to an interface best matching
2030 * a given address.
2031 */
2032struct ifaddr *
2033ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2034{
2035	struct ifaddr *ifa;
2036	const char *cp, *cp2, *cp3;
2037	char *cplim;
2038	struct ifaddr *ifa_maybe = NULL;
2039	u_int af = addr->sa_family;
2040
2041	if (af >= AF_MAX)
2042		return (NULL);
2043
2044	NET_EPOCH_ASSERT();
2045	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2046		if (ifa->ifa_addr->sa_family != af)
2047			continue;
2048		if (ifa_maybe == NULL)
2049			ifa_maybe = ifa;
2050		if (ifa->ifa_netmask == 0) {
2051			if (sa_equal(addr, ifa->ifa_addr) ||
2052			    (ifa->ifa_dstaddr &&
2053			    sa_equal(addr, ifa->ifa_dstaddr)))
2054				goto done;
2055			continue;
2056		}
2057		if (ifp->if_flags & IFF_POINTOPOINT) {
2058			if (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))
2059				goto done;
2060		} else {
2061			cp = addr->sa_data;
2062			cp2 = ifa->ifa_addr->sa_data;
2063			cp3 = ifa->ifa_netmask->sa_data;
2064			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2065			for (; cp3 < cplim; cp3++)
2066				if ((*cp++ ^ *cp2++) & *cp3)
2067					break;
2068			if (cp3 == cplim)
2069				goto done;
2070		}
2071	}
2072	ifa = ifa_maybe;
2073done:
2074	return (ifa);
2075}
2076
2077/*
2078 * See whether new ifa is better than current one:
2079 * 1) A non-virtual one is preferred over virtual.
2080 * 2) A virtual in master state preferred over any other state.
2081 *
2082 * Used in several address selecting functions.
2083 */
2084int
2085ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
2086{
2087
2088	return (cur->ifa_carp && (!next->ifa_carp ||
2089	    ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
2090}
2091
2092struct sockaddr_dl *
2093link_alloc_sdl(size_t size, int flags)
2094{
2095
2096	return (malloc(size, M_TEMP, flags));
2097}
2098
2099void
2100link_free_sdl(struct sockaddr *sa)
2101{
2102	free(sa, M_TEMP);
2103}
2104
2105/*
2106 * Fills in given sdl with interface basic info.
2107 * Returns pointer to filled sdl.
2108 */
2109struct sockaddr_dl *
2110link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
2111{
2112	struct sockaddr_dl *sdl;
2113
2114	sdl = (struct sockaddr_dl *)paddr;
2115	memset(sdl, 0, sizeof(struct sockaddr_dl));
2116	sdl->sdl_len = sizeof(struct sockaddr_dl);
2117	sdl->sdl_family = AF_LINK;
2118	sdl->sdl_index = ifp->if_index;
2119	sdl->sdl_type = iftype;
2120
2121	return (sdl);
2122}
2123
2124/*
2125 * Mark an interface down and notify protocols of
2126 * the transition.
2127 */
2128static void
2129if_unroute(struct ifnet *ifp, int flag, int fam)
2130{
2131
2132	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
2133
2134	ifp->if_flags &= ~flag;
2135	getmicrotime(&ifp->if_lastchange);
2136	ifp->if_qflush(ifp);
2137
2138	if (ifp->if_carp)
2139		(*carp_linkstate_p)(ifp);
2140	rt_ifmsg(ifp, IFF_UP);
2141}
2142
2143void	(*vlan_link_state_p)(struct ifnet *);	/* XXX: private from if_vlan */
2144void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
2145struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
2146struct	ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
2147int	(*vlan_tag_p)(struct ifnet *, uint16_t *);
2148int	(*vlan_pcp_p)(struct ifnet *, uint16_t *);
2149int	(*vlan_setcookie_p)(struct ifnet *, void *);
2150void	*(*vlan_cookie_p)(struct ifnet *);
2151
2152/*
2153 * Handle a change in the interface link state. To avoid LORs
2154 * between driver lock and upper layer locks, as well as possible
2155 * recursions, we post event to taskqueue, and all job
2156 * is done in static do_link_state_change().
2157 */
2158void
2159if_link_state_change(struct ifnet *ifp, int link_state)
2160{
2161	/* Return if state hasn't changed. */
2162	if (ifp->if_link_state == link_state)
2163		return;
2164
2165	ifp->if_link_state = link_state;
2166
2167	/* XXXGL: reference ifp? */
2168	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
2169}
2170
2171static void
2172do_link_state_change(void *arg, int pending)
2173{
2174	struct ifnet *ifp;
2175	int link_state;
2176
2177	ifp = arg;
2178	link_state = ifp->if_link_state;
2179
2180	CURVNET_SET(ifp->if_vnet);
2181	rt_ifmsg(ifp, 0);
2182	if (ifp->if_vlantrunk != NULL)
2183		(*vlan_link_state_p)(ifp);
2184
2185	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
2186	    ifp->if_l2com != NULL)
2187		(*ng_ether_link_state_p)(ifp, link_state);
2188	if (ifp->if_carp)
2189		(*carp_linkstate_p)(ifp);
2190	if (ifp->if_bridge)
2191		ifp->if_bridge_linkstate(ifp);
2192	if (ifp->if_lagg)
2193		(*lagg_linkstate_p)(ifp, link_state);
2194
2195	if (IS_DEFAULT_VNET(curvnet))
2196		devctl_notify("IFNET", ifp->if_xname,
2197		    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
2198		    NULL);
2199	if (pending > 1)
2200		if_printf(ifp, "%d link states coalesced\n", pending);
2201	if (log_link_state_change)
2202		if_printf(ifp, "link state changed to %s\n",
2203		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
2204	EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
2205	CURVNET_RESTORE();
2206}
2207
2208/*
2209 * Mark an interface down and notify protocols of
2210 * the transition.
2211 */
2212void
2213if_down(struct ifnet *ifp)
2214{
2215
2216	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
2217	if_unroute(ifp, IFF_UP, AF_UNSPEC);
2218}
2219
2220/*
2221 * Mark an interface up and notify protocols of
2222 * the transition.
2223 */
2224void
2225if_up(struct ifnet *ifp)
2226{
2227
2228	ifp->if_flags |= IFF_UP;
2229	getmicrotime(&ifp->if_lastchange);
2230	if (ifp->if_carp)
2231		(*carp_linkstate_p)(ifp);
2232	rt_ifmsg(ifp, IFF_UP);
2233	EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
2234}
2235
2236/*
2237 * Flush an interface queue.
2238 */
2239void
2240if_qflush(struct ifnet *ifp)
2241{
2242	struct mbuf *m, *n;
2243	struct ifaltq *ifq;
2244
2245	ifq = &ifp->if_snd;
2246	IFQ_LOCK(ifq);
2247#ifdef ALTQ
2248	if (ALTQ_IS_ENABLED(ifq))
2249		ALTQ_PURGE(ifq);
2250#endif
2251	n = ifq->ifq_head;
2252	while ((m = n) != NULL) {
2253		n = m->m_nextpkt;
2254		m_freem(m);
2255	}
2256	ifq->ifq_head = 0;
2257	ifq->ifq_tail = 0;
2258	ifq->ifq_len = 0;
2259	IFQ_UNLOCK(ifq);
2260}
2261
2262/*
2263 * Map interface name to interface structure pointer, with or without
2264 * returning a reference.
2265 */
2266struct ifnet *
2267ifunit_ref(const char *name)
2268{
2269	struct epoch_tracker et;
2270	struct ifnet *ifp;
2271
2272	NET_EPOCH_ENTER(et);
2273	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2274		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
2275		    !(ifp->if_flags & IFF_DYING))
2276			break;
2277	}
2278	if (ifp != NULL) {
2279		if_ref(ifp);
2280		MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
2281	}
2282
2283	NET_EPOCH_EXIT(et);
2284	return (ifp);
2285}
2286
2287struct ifnet *
2288ifunit(const char *name)
2289{
2290	struct epoch_tracker et;
2291	struct ifnet *ifp;
2292
2293	NET_EPOCH_ENTER(et);
2294	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2295		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
2296			break;
2297	}
2298	NET_EPOCH_EXIT(et);
2299	return (ifp);
2300}
2301
2302void *
2303ifr_buffer_get_buffer(void *data)
2304{
2305	union ifreq_union *ifrup;
2306
2307	ifrup = data;
2308#ifdef COMPAT_FREEBSD32
2309	if (SV_CURPROC_FLAG(SV_ILP32))
2310		return ((void *)(uintptr_t)
2311		    ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
2312#endif
2313	return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
2314}
2315
2316static void
2317ifr_buffer_set_buffer_null(void *data)
2318{
2319	union ifreq_union *ifrup;
2320
2321	ifrup = data;
2322#ifdef COMPAT_FREEBSD32
2323	if (SV_CURPROC_FLAG(SV_ILP32))
2324		ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
2325	else
2326#endif
2327		ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
2328}
2329
2330size_t
2331ifr_buffer_get_length(void *data)
2332{
2333	union ifreq_union *ifrup;
2334
2335	ifrup = data;
2336#ifdef COMPAT_FREEBSD32
2337	if (SV_CURPROC_FLAG(SV_ILP32))
2338		return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
2339#endif
2340	return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
2341}
2342
2343static void
2344ifr_buffer_set_length(void *data, size_t len)
2345{
2346	union ifreq_union *ifrup;
2347
2348	ifrup = data;
2349#ifdef COMPAT_FREEBSD32
2350	if (SV_CURPROC_FLAG(SV_ILP32))
2351		ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
2352	else
2353#endif
2354		ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
2355}
2356
2357void *
2358ifr_data_get_ptr(void *ifrp)
2359{
2360	union ifreq_union *ifrup;
2361
2362	ifrup = ifrp;
2363#ifdef COMPAT_FREEBSD32
2364	if (SV_CURPROC_FLAG(SV_ILP32))
2365		return ((void *)(uintptr_t)
2366		    ifrup->ifr32.ifr_ifru.ifru_data);
2367#endif
2368		return (ifrup->ifr.ifr_ifru.ifru_data);
2369}
2370
2371struct ifcap_nv_bit_name {
2372	uint64_t cap_bit;
2373	const char *cap_name;
2374};
2375#define CAPNV(x) {.cap_bit = IFCAP_##x, \
2376    .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) }
2377const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = {
2378	CAPNV(RXCSUM),
2379	CAPNV(TXCSUM),
2380	CAPNV(NETCONS),
2381	CAPNV(VLAN_MTU),
2382	CAPNV(VLAN_HWTAGGING),
2383	CAPNV(JUMBO_MTU),
2384	CAPNV(POLLING),
2385	CAPNV(VLAN_HWCSUM),
2386	CAPNV(TSO4),
2387	CAPNV(TSO6),
2388	CAPNV(LRO),
2389	CAPNV(WOL_UCAST),
2390	CAPNV(WOL_MCAST),
2391	CAPNV(WOL_MAGIC),
2392	CAPNV(TOE4),
2393	CAPNV(TOE6),
2394	CAPNV(VLAN_HWFILTER),
2395	CAPNV(VLAN_HWTSO),
2396	CAPNV(LINKSTATE),
2397	CAPNV(NETMAP),
2398	CAPNV(RXCSUM_IPV6),
2399	CAPNV(TXCSUM_IPV6),
2400	CAPNV(HWSTATS),
2401	CAPNV(TXRTLMT),
2402	CAPNV(HWRXTSTMP),
2403	CAPNV(MEXTPG),
2404	CAPNV(TXTLS4),
2405	CAPNV(TXTLS6),
2406	CAPNV(VXLAN_HWCSUM),
2407	CAPNV(VXLAN_HWTSO),
2408	CAPNV(TXTLS_RTLMT),
2409	{0, NULL}
2410};
2411#define CAP2NV(x) {.cap_bit = IFCAP2_BIT(IFCAP2_##x), \
2412    .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) }
2413const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = {
2414	CAP2NV(RXTLS4),
2415	CAP2NV(RXTLS6),
2416	{0, NULL}
2417};
2418#undef CAPNV
2419#undef CAP2NV
2420
2421int
2422if_capnv_to_capint(const nvlist_t *nv, int *old_cap,
2423    const struct ifcap_nv_bit_name *nn, bool all)
2424{
2425	int i, res;
2426
2427	res = 0;
2428	for (i = 0; nn[i].cap_name != NULL; i++) {
2429		if (nvlist_exists_bool(nv, nn[i].cap_name)) {
2430			if (all || nvlist_get_bool(nv, nn[i].cap_name))
2431				res |= nn[i].cap_bit;
2432		} else {
2433			res |= *old_cap & nn[i].cap_bit;
2434		}
2435	}
2436	return (res);
2437}
2438
2439void
2440if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn,
2441    int ifr_cap, int ifr_req)
2442{
2443	int i;
2444
2445	for (i = 0; nn[i].cap_name != NULL; i++) {
2446		if ((nn[i].cap_bit & ifr_cap) != 0) {
2447			nvlist_add_bool(nv, nn[i].cap_name,
2448			    (nn[i].cap_bit & ifr_req) != 0);
2449		}
2450	}
2451}
2452
2453/*
2454 * Hardware specific interface ioctls.
2455 */
2456int
2457ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
2458{
2459	struct ifreq *ifr;
2460	int error = 0, do_ifup = 0;
2461	int new_flags, temp_flags;
2462	size_t descrlen, nvbuflen;
2463	char *descrbuf;
2464	char new_name[IFNAMSIZ];
2465	void *buf;
2466	nvlist_t *nvcap;
2467	struct siocsifcapnv_driver_data drv_ioctl_data;
2468
2469	ifr = (struct ifreq *)data;
2470	switch (cmd) {
2471	case SIOCGIFINDEX:
2472		ifr->ifr_index = ifp->if_index;
2473		break;
2474
2475	case SIOCGIFFLAGS:
2476		temp_flags = ifp->if_flags | ifp->if_drv_flags;
2477		ifr->ifr_flags = temp_flags & 0xffff;
2478		ifr->ifr_flagshigh = temp_flags >> 16;
2479		break;
2480
2481	case SIOCGIFCAP:
2482		ifr->ifr_reqcap = ifp->if_capabilities;
2483		ifr->ifr_curcap = ifp->if_capenable;
2484		break;
2485
2486	case SIOCGIFCAPNV:
2487		if ((ifp->if_capabilities & IFCAP_NV) == 0) {
2488			error = EINVAL;
2489			break;
2490		}
2491		buf = NULL;
2492		nvcap = nvlist_create(0);
2493		for (;;) {
2494			if_capint_to_capnv(nvcap, ifcap_nv_bit_names,
2495			    ifp->if_capabilities, ifp->if_capenable);
2496			if_capint_to_capnv(nvcap, ifcap2_nv_bit_names,
2497			    ifp->if_capabilities2, ifp->if_capenable2);
2498			error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV,
2499			    __DECONST(caddr_t, nvcap));
2500			if (error != 0) {
2501				if_printf(ifp,
2502			    "SIOCGIFCAPNV driver mistake: nvlist error %d\n",
2503				    error);
2504				break;
2505			}
2506			buf = nvlist_pack(nvcap, &nvbuflen);
2507			if (buf == NULL) {
2508				error = nvlist_error(nvcap);
2509				if (error == 0)
2510					error = EDOOFUS;
2511				break;
2512			}
2513			if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
2514				ifr->ifr_cap_nv.length = nvbuflen;
2515				ifr->ifr_cap_nv.buffer = NULL;
2516				error = EFBIG;
2517				break;
2518			}
2519			ifr->ifr_cap_nv.length = nvbuflen;
2520			error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen);
2521			break;
2522		}
2523		free(buf, M_NVLIST);
2524		nvlist_destroy(nvcap);
2525		break;
2526
2527	case SIOCGIFDATA:
2528	{
2529		struct if_data ifd;
2530
2531		/* Ensure uninitialised padding is not leaked. */
2532		memset(&ifd, 0, sizeof(ifd));
2533
2534		if_data_copy(ifp, &ifd);
2535		error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd));
2536		break;
2537	}
2538
2539#ifdef MAC
2540	case SIOCGIFMAC:
2541		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
2542		break;
2543#endif
2544
2545	case SIOCGIFMETRIC:
2546		ifr->ifr_metric = ifp->if_metric;
2547		break;
2548
2549	case SIOCGIFMTU:
2550		ifr->ifr_mtu = ifp->if_mtu;
2551		break;
2552
2553	case SIOCGIFPHYS:
2554		/* XXXGL: did this ever worked? */
2555		ifr->ifr_phys = 0;
2556		break;
2557
2558	case SIOCGIFDESCR:
2559		error = 0;
2560		sx_slock(&ifdescr_sx);
2561		if (ifp->if_description == NULL)
2562			error = ENOMSG;
2563		else {
2564			/* space for terminating nul */
2565			descrlen = strlen(ifp->if_description) + 1;
2566			if (ifr_buffer_get_length(ifr) < descrlen)
2567				ifr_buffer_set_buffer_null(ifr);
2568			else
2569				error = copyout(ifp->if_description,
2570				    ifr_buffer_get_buffer(ifr), descrlen);
2571			ifr_buffer_set_length(ifr, descrlen);
2572		}
2573		sx_sunlock(&ifdescr_sx);
2574		break;
2575
2576	case SIOCSIFDESCR:
2577		error = priv_check(td, PRIV_NET_SETIFDESCR);
2578		if (error)
2579			return (error);
2580
2581		/*
2582		 * Copy only (length-1) bytes to make sure that
2583		 * if_description is always nul terminated.  The
2584		 * length parameter is supposed to count the
2585		 * terminating nul in.
2586		 */
2587		if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
2588			return (ENAMETOOLONG);
2589		else if (ifr_buffer_get_length(ifr) == 0)
2590			descrbuf = NULL;
2591		else {
2592			descrbuf = if_allocdescr(ifr_buffer_get_length(ifr), M_WAITOK);
2593			error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
2594			    ifr_buffer_get_length(ifr) - 1);
2595			if (error) {
2596				if_freedescr(descrbuf);
2597				break;
2598			}
2599		}
2600
2601		if_setdescr(ifp, descrbuf);
2602		getmicrotime(&ifp->if_lastchange);
2603		break;
2604
2605	case SIOCGIFFIB:
2606		ifr->ifr_fib = ifp->if_fib;
2607		break;
2608
2609	case SIOCSIFFIB:
2610		error = priv_check(td, PRIV_NET_SETIFFIB);
2611		if (error)
2612			return (error);
2613		if (ifr->ifr_fib >= rt_numfibs)
2614			return (EINVAL);
2615
2616		ifp->if_fib = ifr->ifr_fib;
2617		break;
2618
2619	case SIOCSIFFLAGS:
2620		error = priv_check(td, PRIV_NET_SETIFFLAGS);
2621		if (error)
2622			return (error);
2623		/*
2624		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
2625		 * check, so we don't need special handling here yet.
2626		 */
2627		new_flags = (ifr->ifr_flags & 0xffff) |
2628		    (ifr->ifr_flagshigh << 16);
2629		if (ifp->if_flags & IFF_UP &&
2630		    (new_flags & IFF_UP) == 0) {
2631			if_down(ifp);
2632		} else if (new_flags & IFF_UP &&
2633		    (ifp->if_flags & IFF_UP) == 0) {
2634			do_ifup = 1;
2635		}
2636		/* See if permanently promiscuous mode bit is about to flip */
2637		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
2638			if (new_flags & IFF_PPROMISC)
2639				ifp->if_flags |= IFF_PROMISC;
2640			else if (ifp->if_pcount == 0)
2641				ifp->if_flags &= ~IFF_PROMISC;
2642			if (log_promisc_mode_change)
2643                                if_printf(ifp, "permanently promiscuous mode %s\n",
2644                                    ((new_flags & IFF_PPROMISC) ?
2645                                     "enabled" : "disabled"));
2646		}
2647		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2648			(new_flags &~ IFF_CANTCHANGE);
2649		if (ifp->if_ioctl) {
2650			(void) (*ifp->if_ioctl)(ifp, cmd, data);
2651		}
2652		if (do_ifup)
2653			if_up(ifp);
2654		getmicrotime(&ifp->if_lastchange);
2655		break;
2656
2657	case SIOCSIFCAP:
2658		error = priv_check(td, PRIV_NET_SETIFCAP);
2659		if (error != 0)
2660			return (error);
2661		if (ifp->if_ioctl == NULL)
2662			return (EOPNOTSUPP);
2663		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
2664			return (EINVAL);
2665		error = (*ifp->if_ioctl)(ifp, cmd, data);
2666		if (error == 0)
2667			getmicrotime(&ifp->if_lastchange);
2668		break;
2669
2670	case SIOCSIFCAPNV:
2671		error = priv_check(td, PRIV_NET_SETIFCAP);
2672		if (error != 0)
2673			return (error);
2674		if (ifp->if_ioctl == NULL)
2675			return (EOPNOTSUPP);
2676		if ((ifp->if_capabilities & IFCAP_NV) == 0)
2677			return (EINVAL);
2678		if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
2679			return (EINVAL);
2680		nvcap = NULL;
2681		buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
2682		for (;;) {
2683			error = copyin(ifr->ifr_cap_nv.buffer, buf,
2684			    ifr->ifr_cap_nv.length);
2685			if (error != 0)
2686				break;
2687			nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0);
2688			if (nvcap == NULL) {
2689				error = EINVAL;
2690				break;
2691			}
2692			drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap,
2693			    &ifp->if_capenable, ifcap_nv_bit_names, false);
2694			if ((drv_ioctl_data.reqcap &
2695			    ~ifp->if_capabilities) != 0) {
2696				error = EINVAL;
2697				break;
2698			}
2699			drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap,
2700			    &ifp->if_capenable2, ifcap2_nv_bit_names, false);
2701			if ((drv_ioctl_data.reqcap2 &
2702			    ~ifp->if_capabilities2) != 0) {
2703				error = EINVAL;
2704				break;
2705			}
2706			drv_ioctl_data.nvcap = nvcap;
2707			error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV,
2708			    (caddr_t)&drv_ioctl_data);
2709			break;
2710		}
2711		nvlist_destroy(nvcap);
2712		free(buf, M_TEMP);
2713		if (error == 0)
2714			getmicrotime(&ifp->if_lastchange);
2715		break;
2716
2717#ifdef MAC
2718	case SIOCSIFMAC:
2719		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
2720		break;
2721#endif
2722
2723	case SIOCSIFNAME:
2724		error = priv_check(td, PRIV_NET_SETIFNAME);
2725		if (error)
2726			return (error);
2727		error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
2728		    NULL);
2729		if (error != 0)
2730			return (error);
2731		error = if_rename(ifp, new_name);
2732		break;
2733
2734#ifdef VIMAGE
2735	case SIOCSIFVNET:
2736		error = priv_check(td, PRIV_NET_SETIFVNET);
2737		if (error)
2738			return (error);
2739		error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
2740		break;
2741#endif
2742
2743	case SIOCSIFMETRIC:
2744		error = priv_check(td, PRIV_NET_SETIFMETRIC);
2745		if (error)
2746			return (error);
2747		ifp->if_metric = ifr->ifr_metric;
2748		getmicrotime(&ifp->if_lastchange);
2749		break;
2750
2751	case SIOCSIFPHYS:
2752		error = priv_check(td, PRIV_NET_SETIFPHYS);
2753		if (error)
2754			return (error);
2755		if (ifp->if_ioctl == NULL)
2756			return (EOPNOTSUPP);
2757		error = (*ifp->if_ioctl)(ifp, cmd, data);
2758		if (error == 0)
2759			getmicrotime(&ifp->if_lastchange);
2760		break;
2761
2762	case SIOCSIFMTU:
2763	{
2764		u_long oldmtu = ifp->if_mtu;
2765
2766		error = priv_check(td, PRIV_NET_SETIFMTU);
2767		if (error)
2768			return (error);
2769		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
2770			return (EINVAL);
2771		if (ifp->if_ioctl == NULL)
2772			return (EOPNOTSUPP);
2773		/* Disallow MTU changes on bridge member interfaces. */
2774		if (ifp->if_bridge)
2775			return (EOPNOTSUPP);
2776		error = (*ifp->if_ioctl)(ifp, cmd, data);
2777		if (error == 0) {
2778			getmicrotime(&ifp->if_lastchange);
2779			rt_ifmsg(ifp, 0);
2780#ifdef INET
2781			DEBUGNET_NOTIFY_MTU(ifp);
2782#endif
2783		}
2784		/*
2785		 * If the link MTU changed, do network layer specific procedure.
2786		 */
2787		if (ifp->if_mtu != oldmtu)
2788			if_notifymtu(ifp);
2789		break;
2790	}
2791
2792	case SIOCADDMULTI:
2793	case SIOCDELMULTI:
2794		if (cmd == SIOCADDMULTI)
2795			error = priv_check(td, PRIV_NET_ADDMULTI);
2796		else
2797			error = priv_check(td, PRIV_NET_DELMULTI);
2798		if (error)
2799			return (error);
2800
2801		/* Don't allow group membership on non-multicast interfaces. */
2802		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2803			return (EOPNOTSUPP);
2804
2805		/* Don't let users screw up protocols' entries. */
2806		if (ifr->ifr_addr.sa_family != AF_LINK)
2807			return (EINVAL);
2808
2809		if (cmd == SIOCADDMULTI) {
2810			struct epoch_tracker et;
2811			struct ifmultiaddr *ifma;
2812
2813			/*
2814			 * Userland is only permitted to join groups once
2815			 * via the if_addmulti() KPI, because it cannot hold
2816			 * struct ifmultiaddr * between calls. It may also
2817			 * lose a race while we check if the membership
2818			 * already exists.
2819			 */
2820			NET_EPOCH_ENTER(et);
2821			ifma = if_findmulti(ifp, &ifr->ifr_addr);
2822			NET_EPOCH_EXIT(et);
2823			if (ifma != NULL)
2824				error = EADDRINUSE;
2825			else
2826				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2827		} else {
2828			error = if_delmulti(ifp, &ifr->ifr_addr);
2829		}
2830		if (error == 0)
2831			getmicrotime(&ifp->if_lastchange);
2832		break;
2833
2834	case SIOCSIFPHYADDR:
2835	case SIOCDIFPHYADDR:
2836#ifdef INET6
2837	case SIOCSIFPHYADDR_IN6:
2838#endif
2839	case SIOCSIFMEDIA:
2840	case SIOCSIFGENERIC:
2841		error = priv_check(td, PRIV_NET_HWIOCTL);
2842		if (error)
2843			return (error);
2844		if (ifp->if_ioctl == NULL)
2845			return (EOPNOTSUPP);
2846		error = (*ifp->if_ioctl)(ifp, cmd, data);
2847		if (error == 0)
2848			getmicrotime(&ifp->if_lastchange);
2849		break;
2850
2851	case SIOCGIFSTATUS:
2852	case SIOCGIFPSRCADDR:
2853	case SIOCGIFPDSTADDR:
2854	case SIOCGIFMEDIA:
2855	case SIOCGIFXMEDIA:
2856	case SIOCGIFGENERIC:
2857	case SIOCGIFRSSKEY:
2858	case SIOCGIFRSSHASH:
2859	case SIOCGIFDOWNREASON:
2860		if (ifp->if_ioctl == NULL)
2861			return (EOPNOTSUPP);
2862		error = (*ifp->if_ioctl)(ifp, cmd, data);
2863		break;
2864
2865	case SIOCSIFLLADDR:
2866		error = priv_check(td, PRIV_NET_SETLLADDR);
2867		if (error)
2868			return (error);
2869		error = if_setlladdr(ifp,
2870		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
2871		break;
2872
2873	case SIOCGHWADDR:
2874		error = if_gethwaddr(ifp, ifr);
2875		break;
2876
2877	case SIOCAIFGROUP:
2878		error = priv_check(td, PRIV_NET_ADDIFGROUP);
2879		if (error)
2880			return (error);
2881		error = if_addgroup(ifp,
2882		    ((struct ifgroupreq *)data)->ifgr_group);
2883		if (error != 0)
2884			return (error);
2885		break;
2886
2887	case SIOCGIFGROUP:
2888	{
2889		struct epoch_tracker et;
2890
2891		NET_EPOCH_ENTER(et);
2892		error = if_getgroup((struct ifgroupreq *)data, ifp);
2893		NET_EPOCH_EXIT(et);
2894		break;
2895	}
2896
2897	case SIOCDIFGROUP:
2898		error = priv_check(td, PRIV_NET_DELIFGROUP);
2899		if (error)
2900			return (error);
2901		error = if_delgroup(ifp,
2902		    ((struct ifgroupreq *)data)->ifgr_group);
2903		if (error != 0)
2904			return (error);
2905		break;
2906
2907	default:
2908		error = ENOIOCTL;
2909		break;
2910	}
2911	return (error);
2912}
2913
2914/*
2915 * Interface ioctls.
2916 */
2917int
2918ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
2919{
2920#ifdef COMPAT_FREEBSD32
2921	union {
2922		struct ifconf ifc;
2923		struct ifdrv ifd;
2924		struct ifgroupreq ifgr;
2925		struct ifmediareq ifmr;
2926	} thunk;
2927	u_long saved_cmd;
2928	struct ifconf32 *ifc32;
2929	struct ifdrv32 *ifd32;
2930	struct ifgroupreq32 *ifgr32;
2931	struct ifmediareq32 *ifmr32;
2932#endif
2933	struct ifnet *ifp;
2934	struct ifreq *ifr;
2935	int error;
2936	int oif_flags;
2937#ifdef VIMAGE
2938	bool shutdown;
2939#endif
2940
2941	CURVNET_SET(so->so_vnet);
2942#ifdef VIMAGE
2943	/* Make sure the VNET is stable. */
2944	shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet);
2945	if (shutdown) {
2946		CURVNET_RESTORE();
2947		return (EBUSY);
2948	}
2949#endif
2950
2951#ifdef COMPAT_FREEBSD32
2952	saved_cmd = cmd;
2953	switch (cmd) {
2954	case SIOCGIFCONF32:
2955		ifc32 = (struct ifconf32 *)data;
2956		thunk.ifc.ifc_len = ifc32->ifc_len;
2957		thunk.ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
2958		data = (caddr_t)&thunk.ifc;
2959		cmd = SIOCGIFCONF;
2960		break;
2961	case SIOCGDRVSPEC32:
2962	case SIOCSDRVSPEC32:
2963		ifd32 = (struct ifdrv32 *)data;
2964		memcpy(thunk.ifd.ifd_name, ifd32->ifd_name,
2965		    sizeof(thunk.ifd.ifd_name));
2966		thunk.ifd.ifd_cmd = ifd32->ifd_cmd;
2967		thunk.ifd.ifd_len = ifd32->ifd_len;
2968		thunk.ifd.ifd_data = PTRIN(ifd32->ifd_data);
2969		data = (caddr_t)&thunk.ifd;
2970		cmd = _IOC_NEWTYPE(cmd, struct ifdrv);
2971		break;
2972	case SIOCAIFGROUP32:
2973	case SIOCGIFGROUP32:
2974	case SIOCDIFGROUP32:
2975	case SIOCGIFGMEMB32:
2976		ifgr32 = (struct ifgroupreq32 *)data;
2977		memcpy(thunk.ifgr.ifgr_name, ifgr32->ifgr_name,
2978		    sizeof(thunk.ifgr.ifgr_name));
2979		thunk.ifgr.ifgr_len = ifgr32->ifgr_len;
2980		switch (cmd) {
2981		case SIOCAIFGROUP32:
2982		case SIOCDIFGROUP32:
2983			memcpy(thunk.ifgr.ifgr_group, ifgr32->ifgr_group,
2984			    sizeof(thunk.ifgr.ifgr_group));
2985			break;
2986		case SIOCGIFGROUP32:
2987		case SIOCGIFGMEMB32:
2988			thunk.ifgr.ifgr_groups = PTRIN(ifgr32->ifgr_groups);
2989			break;
2990		}
2991		data = (caddr_t)&thunk.ifgr;
2992		cmd = _IOC_NEWTYPE(cmd, struct ifgroupreq);
2993		break;
2994	case SIOCGIFMEDIA32:
2995	case SIOCGIFXMEDIA32:
2996		ifmr32 = (struct ifmediareq32 *)data;
2997		memcpy(thunk.ifmr.ifm_name, ifmr32->ifm_name,
2998		    sizeof(thunk.ifmr.ifm_name));
2999		thunk.ifmr.ifm_current = ifmr32->ifm_current;
3000		thunk.ifmr.ifm_mask = ifmr32->ifm_mask;
3001		thunk.ifmr.ifm_status = ifmr32->ifm_status;
3002		thunk.ifmr.ifm_active = ifmr32->ifm_active;
3003		thunk.ifmr.ifm_count = ifmr32->ifm_count;
3004		thunk.ifmr.ifm_ulist = PTRIN(ifmr32->ifm_ulist);
3005		data = (caddr_t)&thunk.ifmr;
3006		cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
3007		break;
3008	}
3009#endif
3010
3011	switch (cmd) {
3012	case SIOCGIFCONF:
3013		error = ifconf(cmd, data);
3014		goto out_noref;
3015	}
3016
3017	ifr = (struct ifreq *)data;
3018	switch (cmd) {
3019#ifdef VIMAGE
3020	case SIOCSIFRVNET:
3021		error = priv_check(td, PRIV_NET_SETIFVNET);
3022		if (error == 0)
3023			error = if_vmove_reclaim(td, ifr->ifr_name,
3024			    ifr->ifr_jid);
3025		goto out_noref;
3026#endif
3027	case SIOCIFCREATE:
3028	case SIOCIFCREATE2:
3029		error = priv_check(td, PRIV_NET_IFCREATE);
3030		if (error == 0)
3031			error = if_clone_create(ifr->ifr_name,
3032			    sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
3033			    ifr_data_get_ptr(ifr) : NULL);
3034		goto out_noref;
3035	case SIOCIFDESTROY:
3036		error = priv_check(td, PRIV_NET_IFDESTROY);
3037
3038		if (error == 0) {
3039			sx_xlock(&ifnet_detach_sxlock);
3040			error = if_clone_destroy(ifr->ifr_name);
3041			sx_xunlock(&ifnet_detach_sxlock);
3042		}
3043		goto out_noref;
3044
3045	case SIOCIFGCLONERS:
3046		error = if_clone_list((struct if_clonereq *)data);
3047		goto out_noref;
3048
3049	case SIOCGIFGMEMB:
3050		error = if_getgroupmembers((struct ifgroupreq *)data);
3051		goto out_noref;
3052
3053#if defined(INET) || defined(INET6)
3054	case SIOCSVH:
3055	case SIOCGVH:
3056		if (carp_ioctl_p == NULL)
3057			error = EPROTONOSUPPORT;
3058		else
3059			error = (*carp_ioctl_p)(ifr, cmd, td);
3060		goto out_noref;
3061#endif
3062	}
3063
3064	ifp = ifunit_ref(ifr->ifr_name);
3065	if (ifp == NULL) {
3066		error = ENXIO;
3067		goto out_noref;
3068	}
3069
3070	error = ifhwioctl(cmd, ifp, data, td);
3071	if (error != ENOIOCTL)
3072		goto out_ref;
3073
3074	oif_flags = ifp->if_flags;
3075	if (so->so_proto == NULL) {
3076		error = EOPNOTSUPP;
3077		goto out_ref;
3078	}
3079
3080	/*
3081	 * Pass the request on to the socket control method, and if the
3082	 * latter returns EOPNOTSUPP, directly to the interface.
3083	 *
3084	 * Make an exception for the legacy SIOCSIF* requests.  Drivers
3085	 * trust SIOCSIFADDR et al to come from an already privileged
3086	 * layer, and do not perform any credentials checks or input
3087	 * validation.
3088	 */
3089	error = so->so_proto->pr_control(so, cmd, data, ifp, td);
3090	if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
3091	    cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
3092	    cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
3093		error = (*ifp->if_ioctl)(ifp, cmd, data);
3094
3095	if (!(oif_flags & IFF_UP) && (ifp->if_flags & IFF_UP))
3096		if_up(ifp);
3097out_ref:
3098	if_rele(ifp);
3099out_noref:
3100	CURVNET_RESTORE();
3101#ifdef COMPAT_FREEBSD32
3102	if (error != 0)
3103		return (error);
3104	switch (saved_cmd) {
3105	case SIOCGIFCONF32:
3106		ifc32->ifc_len = thunk.ifc.ifc_len;
3107		break;
3108	case SIOCGDRVSPEC32:
3109		/*
3110		 * SIOCGDRVSPEC is IOWR, but nothing actually touches
3111		 * the struct so just assert that ifd_len (the only
3112		 * field it might make sense to update) hasn't
3113		 * changed.
3114		 */
3115		KASSERT(thunk.ifd.ifd_len == ifd32->ifd_len,
3116		    ("ifd_len was updated %u -> %zu", ifd32->ifd_len,
3117			thunk.ifd.ifd_len));
3118		break;
3119	case SIOCGIFGROUP32:
3120	case SIOCGIFGMEMB32:
3121		ifgr32->ifgr_len = thunk.ifgr.ifgr_len;
3122		break;
3123	case SIOCGIFMEDIA32:
3124	case SIOCGIFXMEDIA32:
3125		ifmr32->ifm_current = thunk.ifmr.ifm_current;
3126		ifmr32->ifm_mask = thunk.ifmr.ifm_mask;
3127		ifmr32->ifm_status = thunk.ifmr.ifm_status;
3128		ifmr32->ifm_active = thunk.ifmr.ifm_active;
3129		ifmr32->ifm_count = thunk.ifmr.ifm_count;
3130		break;
3131	}
3132#endif
3133	return (error);
3134}
3135
3136int
3137if_rename(struct ifnet *ifp, char *new_name)
3138{
3139	struct ifaddr *ifa;
3140	struct sockaddr_dl *sdl;
3141	size_t namelen, onamelen;
3142	char old_name[IFNAMSIZ];
3143	char strbuf[IFNAMSIZ + 8];
3144
3145	if (new_name[0] == '\0')
3146		return (EINVAL);
3147	if (strcmp(new_name, ifp->if_xname) == 0)
3148		return (0);
3149	if (ifunit(new_name) != NULL)
3150		return (EEXIST);
3151
3152	/*
3153	 * XXX: Locking.  Nothing else seems to lock if_flags,
3154	 * and there are numerous other races with the
3155	 * ifunit() checks not being atomic with namespace
3156	 * changes (renames, vmoves, if_attach, etc).
3157	 */
3158	ifp->if_flags |= IFF_RENAMING;
3159
3160	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
3161
3162	if_printf(ifp, "changing name to '%s'\n", new_name);
3163
3164	IF_ADDR_WLOCK(ifp);
3165	strlcpy(old_name, ifp->if_xname, sizeof(old_name));
3166	strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
3167	ifa = ifp->if_addr;
3168	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3169	namelen = strlen(new_name);
3170	onamelen = sdl->sdl_nlen;
3171	/*
3172	 * Move the address if needed.  This is safe because we
3173	 * allocate space for a name of length IFNAMSIZ when we
3174	 * create this in if_attach().
3175	 */
3176	if (namelen != onamelen) {
3177		bcopy(sdl->sdl_data + onamelen,
3178		    sdl->sdl_data + namelen, sdl->sdl_alen);
3179	}
3180	bcopy(new_name, sdl->sdl_data, namelen);
3181	sdl->sdl_nlen = namelen;
3182	sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
3183	bzero(sdl->sdl_data, onamelen);
3184	while (namelen != 0)
3185		sdl->sdl_data[--namelen] = 0xff;
3186	IF_ADDR_WUNLOCK(ifp);
3187
3188	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
3189
3190	ifp->if_flags &= ~IFF_RENAMING;
3191
3192	snprintf(strbuf, sizeof(strbuf), "name=%s", new_name);
3193	devctl_notify("IFNET", old_name, "RENAME", strbuf);
3194
3195	return (0);
3196}
3197
3198/*
3199 * The code common to handling reference counted flags,
3200 * e.g., in ifpromisc() and if_allmulti().
3201 * The "pflag" argument can specify a permanent mode flag to check,
3202 * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
3203 *
3204 * Only to be used on stack-owned flags, not driver-owned flags.
3205 */
3206static int
3207if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
3208{
3209	struct ifreq ifr;
3210	int error;
3211	int oldflags, oldcount;
3212
3213	/* Sanity checks to catch programming errors */
3214	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
3215	    ("%s: setting driver-owned flag %d", __func__, flag));
3216
3217	if (onswitch)
3218		KASSERT(*refcount >= 0,
3219		    ("%s: increment negative refcount %d for flag %d",
3220		    __func__, *refcount, flag));
3221	else
3222		KASSERT(*refcount > 0,
3223		    ("%s: decrement non-positive refcount %d for flag %d",
3224		    __func__, *refcount, flag));
3225
3226	/* In case this mode is permanent, just touch refcount */
3227	if (ifp->if_flags & pflag) {
3228		*refcount += onswitch ? 1 : -1;
3229		return (0);
3230	}
3231
3232	/* Save ifnet parameters for if_ioctl() may fail */
3233	oldcount = *refcount;
3234	oldflags = ifp->if_flags;
3235
3236	/*
3237	 * See if we aren't the only and touching refcount is enough.
3238	 * Actually toggle interface flag if we are the first or last.
3239	 */
3240	if (onswitch) {
3241		if ((*refcount)++)
3242			return (0);
3243		ifp->if_flags |= flag;
3244	} else {
3245		if (--(*refcount))
3246			return (0);
3247		ifp->if_flags &= ~flag;
3248	}
3249
3250	/* Call down the driver since we've changed interface flags */
3251	if (ifp->if_ioctl == NULL) {
3252		error = EOPNOTSUPP;
3253		goto recover;
3254	}
3255	ifr.ifr_flags = ifp->if_flags & 0xffff;
3256	ifr.ifr_flagshigh = ifp->if_flags >> 16;
3257	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3258	if (error)
3259		goto recover;
3260	/* Notify userland that interface flags have changed */
3261	rt_ifmsg(ifp, flag);
3262	return (0);
3263
3264recover:
3265	/* Recover after driver error */
3266	*refcount = oldcount;
3267	ifp->if_flags = oldflags;
3268	return (error);
3269}
3270
3271/*
3272 * Set/clear promiscuous mode on interface ifp based on the truth value
3273 * of pswitch.  The calls are reference counted so that only the first
3274 * "on" request actually has an effect, as does the final "off" request.
3275 * Results are undefined if the "off" and "on" requests are not matched.
3276 */
3277int
3278ifpromisc(struct ifnet *ifp, int pswitch)
3279{
3280	int error;
3281	int oldflags = ifp->if_flags;
3282
3283	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
3284			   &ifp->if_pcount, pswitch);
3285	/* If promiscuous mode status has changed, log a message */
3286	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
3287            log_promisc_mode_change)
3288		if_printf(ifp, "promiscuous mode %s\n",
3289		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
3290	return (error);
3291}
3292
3293/*
3294 * Return interface configuration
3295 * of system.  List may be used
3296 * in later ioctl's (above) to get
3297 * other information.
3298 */
3299/*ARGSUSED*/
3300static int
3301ifconf(u_long cmd, caddr_t data)
3302{
3303	struct ifconf *ifc = (struct ifconf *)data;
3304	struct ifnet *ifp;
3305	struct ifaddr *ifa;
3306	struct ifreq ifr;
3307	struct sbuf *sb;
3308	int error, full = 0, valid_len, max_len;
3309
3310	/* Limit initial buffer size to maxphys to avoid DoS from userspace. */
3311	max_len = maxphys - 1;
3312
3313	/* Prevent hostile input from being able to crash the system */
3314	if (ifc->ifc_len <= 0)
3315		return (EINVAL);
3316
3317again:
3318	if (ifc->ifc_len <= max_len) {
3319		max_len = ifc->ifc_len;
3320		full = 1;
3321	}
3322	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
3323	max_len = 0;
3324	valid_len = 0;
3325
3326	IFNET_RLOCK();
3327	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
3328		struct epoch_tracker et;
3329		int addrs;
3330
3331		/*
3332		 * Zero the ifr to make sure we don't disclose the contents
3333		 * of the stack.
3334		 */
3335		memset(&ifr, 0, sizeof(ifr));
3336
3337		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
3338		    >= sizeof(ifr.ifr_name)) {
3339			sbuf_delete(sb);
3340			IFNET_RUNLOCK();
3341			return (ENAMETOOLONG);
3342		}
3343
3344		addrs = 0;
3345		NET_EPOCH_ENTER(et);
3346		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3347			struct sockaddr *sa = ifa->ifa_addr;
3348
3349			if (prison_if(curthread->td_ucred, sa) != 0)
3350				continue;
3351			addrs++;
3352			if (sa->sa_len <= sizeof(*sa)) {
3353				if (sa->sa_len < sizeof(*sa)) {
3354					memset(&ifr.ifr_ifru.ifru_addr, 0,
3355					    sizeof(ifr.ifr_ifru.ifru_addr));
3356					memcpy(&ifr.ifr_ifru.ifru_addr, sa,
3357					    sa->sa_len);
3358				} else
3359					ifr.ifr_ifru.ifru_addr = *sa;
3360				sbuf_bcat(sb, &ifr, sizeof(ifr));
3361				max_len += sizeof(ifr);
3362			} else {
3363				sbuf_bcat(sb, &ifr,
3364				    offsetof(struct ifreq, ifr_addr));
3365				max_len += offsetof(struct ifreq, ifr_addr);
3366				sbuf_bcat(sb, sa, sa->sa_len);
3367				max_len += sa->sa_len;
3368			}
3369
3370			if (sbuf_error(sb) == 0)
3371				valid_len = sbuf_len(sb);
3372		}
3373		NET_EPOCH_EXIT(et);
3374		if (addrs == 0) {
3375			sbuf_bcat(sb, &ifr, sizeof(ifr));
3376			max_len += sizeof(ifr);
3377
3378			if (sbuf_error(sb) == 0)
3379				valid_len = sbuf_len(sb);
3380		}
3381	}
3382	IFNET_RUNLOCK();
3383
3384	/*
3385	 * If we didn't allocate enough space (uncommon), try again.  If
3386	 * we have already allocated as much space as we are allowed,
3387	 * return what we've got.
3388	 */
3389	if (valid_len != max_len && !full) {
3390		sbuf_delete(sb);
3391		goto again;
3392	}
3393
3394	ifc->ifc_len = valid_len;
3395	sbuf_finish(sb);
3396	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
3397	sbuf_delete(sb);
3398	return (error);
3399}
3400
3401/*
3402 * Just like ifpromisc(), but for all-multicast-reception mode.
3403 */
3404int
3405if_allmulti(struct ifnet *ifp, int onswitch)
3406{
3407
3408	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
3409}
3410
3411struct ifmultiaddr *
3412if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
3413{
3414	struct ifmultiaddr *ifma;
3415
3416	IF_ADDR_LOCK_ASSERT(ifp);
3417
3418	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3419		if (sa->sa_family == AF_LINK) {
3420			if (sa_dl_equal(ifma->ifma_addr, sa))
3421				break;
3422		} else {
3423			if (sa_equal(ifma->ifma_addr, sa))
3424				break;
3425		}
3426	}
3427
3428	return ifma;
3429}
3430
3431/*
3432 * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
3433 * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
3434 * the ifnet multicast address list here, so the caller must do that and
3435 * other setup work (such as notifying the device driver).  The reference
3436 * count is initialized to 1.
3437 */
3438static struct ifmultiaddr *
3439if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
3440    int mflags)
3441{
3442	struct ifmultiaddr *ifma;
3443	struct sockaddr *dupsa;
3444
3445	ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
3446	    M_ZERO);
3447	if (ifma == NULL)
3448		return (NULL);
3449
3450	dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
3451	if (dupsa == NULL) {
3452		free(ifma, M_IFMADDR);
3453		return (NULL);
3454	}
3455	bcopy(sa, dupsa, sa->sa_len);
3456	ifma->ifma_addr = dupsa;
3457
3458	ifma->ifma_ifp = ifp;
3459	ifma->ifma_refcount = 1;
3460	ifma->ifma_protospec = NULL;
3461
3462	if (llsa == NULL) {
3463		ifma->ifma_lladdr = NULL;
3464		return (ifma);
3465	}
3466
3467	dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
3468	if (dupsa == NULL) {
3469		free(ifma->ifma_addr, M_IFMADDR);
3470		free(ifma, M_IFMADDR);
3471		return (NULL);
3472	}
3473	bcopy(llsa, dupsa, llsa->sa_len);
3474	ifma->ifma_lladdr = dupsa;
3475
3476	return (ifma);
3477}
3478
3479/*
3480 * if_freemulti: free ifmultiaddr structure and possibly attached related
3481 * addresses.  The caller is responsible for implementing reference
3482 * counting, notifying the driver, handling routing messages, and releasing
3483 * any dependent link layer state.
3484 */
3485#ifdef MCAST_VERBOSE
3486extern void kdb_backtrace(void);
3487#endif
3488static void
3489if_freemulti_internal(struct ifmultiaddr *ifma)
3490{
3491
3492	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
3493	    ifma->ifma_refcount));
3494
3495	if (ifma->ifma_lladdr != NULL)
3496		free(ifma->ifma_lladdr, M_IFMADDR);
3497#ifdef MCAST_VERBOSE
3498	kdb_backtrace();
3499	printf("%s freeing ifma: %p\n", __func__, ifma);
3500#endif
3501	free(ifma->ifma_addr, M_IFMADDR);
3502	free(ifma, M_IFMADDR);
3503}
3504
3505static void
3506if_destroymulti(epoch_context_t ctx)
3507{
3508	struct ifmultiaddr *ifma;
3509
3510	ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx);
3511	if_freemulti_internal(ifma);
3512}
3513
3514void
3515if_freemulti(struct ifmultiaddr *ifma)
3516{
3517	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d",
3518	    ifma->ifma_refcount));
3519
3520	NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx);
3521}
3522
3523/*
3524 * Register an additional multicast address with a network interface.
3525 *
3526 * - If the address is already present, bump the reference count on the
3527 *   address and return.
3528 * - If the address is not link-layer, look up a link layer address.
3529 * - Allocate address structures for one or both addresses, and attach to the
3530 *   multicast address list on the interface.  If automatically adding a link
3531 *   layer address, the protocol address will own a reference to the link
3532 *   layer address, to be freed when it is freed.
3533 * - Notify the network device driver of an addition to the multicast address
3534 *   list.
3535 *
3536 * 'sa' points to caller-owned memory with the desired multicast address.
3537 *
3538 * 'retifma' will be used to return a pointer to the resulting multicast
3539 * address reference, if desired.
3540 */
3541int
3542if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
3543    struct ifmultiaddr **retifma)
3544{
3545	struct ifmultiaddr *ifma, *ll_ifma;
3546	struct sockaddr *llsa;
3547	struct sockaddr_dl sdl;
3548	int error;
3549
3550#ifdef INET
3551	IN_MULTI_LIST_UNLOCK_ASSERT();
3552#endif
3553#ifdef INET6
3554	IN6_MULTI_LIST_UNLOCK_ASSERT();
3555#endif
3556	/*
3557	 * If the address is already present, return a new reference to it;
3558	 * otherwise, allocate storage and set up a new address.
3559	 */
3560	IF_ADDR_WLOCK(ifp);
3561	ifma = if_findmulti(ifp, sa);
3562	if (ifma != NULL) {
3563		ifma->ifma_refcount++;
3564		if (retifma != NULL)
3565			*retifma = ifma;
3566		IF_ADDR_WUNLOCK(ifp);
3567		return (0);
3568	}
3569
3570	/*
3571	 * The address isn't already present; resolve the protocol address
3572	 * into a link layer address, and then look that up, bump its
3573	 * refcount or allocate an ifma for that also.
3574	 * Most link layer resolving functions returns address data which
3575	 * fits inside default sockaddr_dl structure. However callback
3576	 * can allocate another sockaddr structure, in that case we need to
3577	 * free it later.
3578	 */
3579	llsa = NULL;
3580	ll_ifma = NULL;
3581	if (ifp->if_resolvemulti != NULL) {
3582		/* Provide called function with buffer size information */
3583		sdl.sdl_len = sizeof(sdl);
3584		llsa = (struct sockaddr *)&sdl;
3585		error = ifp->if_resolvemulti(ifp, &llsa, sa);
3586		if (error)
3587			goto unlock_out;
3588	}
3589
3590	/*
3591	 * Allocate the new address.  Don't hook it up yet, as we may also
3592	 * need to allocate a link layer multicast address.
3593	 */
3594	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
3595	if (ifma == NULL) {
3596		error = ENOMEM;
3597		goto free_llsa_out;
3598	}
3599
3600	/*
3601	 * If a link layer address is found, we'll need to see if it's
3602	 * already present in the address list, or allocate is as well.
3603	 * When this block finishes, the link layer address will be on the
3604	 * list.
3605	 */
3606	if (llsa != NULL) {
3607		ll_ifma = if_findmulti(ifp, llsa);
3608		if (ll_ifma == NULL) {
3609			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
3610			if (ll_ifma == NULL) {
3611				--ifma->ifma_refcount;
3612				if_freemulti(ifma);
3613				error = ENOMEM;
3614				goto free_llsa_out;
3615			}
3616			ll_ifma->ifma_flags |= IFMA_F_ENQUEUED;
3617			CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
3618			    ifma_link);
3619		} else
3620			ll_ifma->ifma_refcount++;
3621		ifma->ifma_llifma = ll_ifma;
3622	}
3623
3624	/*
3625	 * We now have a new multicast address, ifma, and possibly a new or
3626	 * referenced link layer address.  Add the primary address to the
3627	 * ifnet address list.
3628	 */
3629	ifma->ifma_flags |= IFMA_F_ENQUEUED;
3630	CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
3631
3632	if (retifma != NULL)
3633		*retifma = ifma;
3634
3635	/*
3636	 * Must generate the message while holding the lock so that 'ifma'
3637	 * pointer is still valid.
3638	 */
3639	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
3640	IF_ADDR_WUNLOCK(ifp);
3641
3642	/*
3643	 * We are certain we have added something, so call down to the
3644	 * interface to let them know about it.
3645	 */
3646	if (ifp->if_ioctl != NULL) {
3647		if (THREAD_CAN_SLEEP())
3648			(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3649		else
3650			taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask);
3651	}
3652
3653	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3654		link_free_sdl(llsa);
3655
3656	return (0);
3657
3658free_llsa_out:
3659	if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
3660		link_free_sdl(llsa);
3661
3662unlock_out:
3663	IF_ADDR_WUNLOCK(ifp);
3664	return (error);
3665}
3666
3667static void
3668if_siocaddmulti(void *arg, int pending)
3669{
3670	struct ifnet *ifp;
3671
3672	ifp = arg;
3673#ifdef DIAGNOSTIC
3674	if (pending > 1)
3675		if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending);
3676#endif
3677	CURVNET_SET(ifp->if_vnet);
3678	(void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
3679	CURVNET_RESTORE();
3680}
3681
3682/*
3683 * Delete a multicast group membership by network-layer group address.
3684 *
3685 * Returns ENOENT if the entry could not be found. If ifp no longer
3686 * exists, results are undefined. This entry point should only be used
3687 * from subsystems which do appropriate locking to hold ifp for the
3688 * duration of the call.
3689 * Network-layer protocol domains must use if_delmulti_ifma().
3690 */
3691int
3692if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
3693{
3694	struct ifmultiaddr *ifma;
3695	int lastref;
3696
3697	KASSERT(ifp, ("%s: NULL ifp", __func__));
3698
3699	IF_ADDR_WLOCK(ifp);
3700	lastref = 0;
3701	ifma = if_findmulti(ifp, sa);
3702	if (ifma != NULL)
3703		lastref = if_delmulti_locked(ifp, ifma, 0);
3704	IF_ADDR_WUNLOCK(ifp);
3705
3706	if (ifma == NULL)
3707		return (ENOENT);
3708
3709	if (lastref && ifp->if_ioctl != NULL) {
3710		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3711	}
3712
3713	return (0);
3714}
3715
3716/*
3717 * Delete all multicast group membership for an interface.
3718 * Should be used to quickly flush all multicast filters.
3719 */
3720void
3721if_delallmulti(struct ifnet *ifp)
3722{
3723	struct ifmultiaddr *ifma;
3724	struct ifmultiaddr *next;
3725
3726	IF_ADDR_WLOCK(ifp);
3727	CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3728		if_delmulti_locked(ifp, ifma, 0);
3729	IF_ADDR_WUNLOCK(ifp);
3730}
3731
3732void
3733if_delmulti_ifma(struct ifmultiaddr *ifma)
3734{
3735	if_delmulti_ifma_flags(ifma, 0);
3736}
3737
3738/*
3739 * Delete a multicast group membership by group membership pointer.
3740 * Network-layer protocol domains must use this routine.
3741 *
3742 * It is safe to call this routine if the ifp disappeared.
3743 */
3744void
3745if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
3746{
3747	struct ifnet *ifp;
3748	int lastref;
3749	MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma);
3750#ifdef INET
3751	IN_MULTI_LIST_UNLOCK_ASSERT();
3752#endif
3753	ifp = ifma->ifma_ifp;
3754#ifdef DIAGNOSTIC
3755	if (ifp == NULL) {
3756		printf("%s: ifma_ifp seems to be detached\n", __func__);
3757	} else {
3758		struct epoch_tracker et;
3759		struct ifnet *oifp;
3760
3761		NET_EPOCH_ENTER(et);
3762		CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
3763			if (ifp == oifp)
3764				break;
3765		NET_EPOCH_EXIT(et);
3766		if (ifp != oifp)
3767			ifp = NULL;
3768	}
3769#endif
3770	/*
3771	 * If and only if the ifnet instance exists: Acquire the address lock.
3772	 */
3773	if (ifp != NULL)
3774		IF_ADDR_WLOCK(ifp);
3775
3776	lastref = if_delmulti_locked(ifp, ifma, flags);
3777
3778	if (ifp != NULL) {
3779		/*
3780		 * If and only if the ifnet instance exists:
3781		 *  Release the address lock.
3782		 *  If the group was left: update the hardware hash filter.
3783		 */
3784		IF_ADDR_WUNLOCK(ifp);
3785		if (lastref && ifp->if_ioctl != NULL) {
3786			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
3787		}
3788	}
3789}
3790
3791/*
3792 * Perform deletion of network-layer and/or link-layer multicast address.
3793 *
3794 * Return 0 if the reference count was decremented.
3795 * Return 1 if the final reference was released, indicating that the
3796 * hardware hash filter should be reprogrammed.
3797 */
3798static int
3799if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
3800{
3801	struct ifmultiaddr *ll_ifma;
3802
3803	if (ifp != NULL && ifma->ifma_ifp != NULL) {
3804		KASSERT(ifma->ifma_ifp == ifp,
3805		    ("%s: inconsistent ifp %p", __func__, ifp));
3806		IF_ADDR_WLOCK_ASSERT(ifp);
3807	}
3808
3809	ifp = ifma->ifma_ifp;
3810	MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : "");
3811
3812	/*
3813	 * If the ifnet is detaching, null out references to ifnet,
3814	 * so that upper protocol layers will notice, and not attempt
3815	 * to obtain locks for an ifnet which no longer exists. The
3816	 * routing socket announcement must happen before the ifnet
3817	 * instance is detached from the system.
3818	 */
3819	if (detaching) {
3820#ifdef DIAGNOSTIC
3821		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
3822#endif
3823		/*
3824		 * ifp may already be nulled out if we are being reentered
3825		 * to delete the ll_ifma.
3826		 */
3827		if (ifp != NULL) {
3828			rt_newmaddrmsg(RTM_DELMADDR, ifma);
3829			ifma->ifma_ifp = NULL;
3830		}
3831	}
3832
3833	if (--ifma->ifma_refcount > 0)
3834		return 0;
3835
3836	if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) {
3837		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
3838		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
3839	}
3840	/*
3841	 * If this ifma is a network-layer ifma, a link-layer ifma may
3842	 * have been associated with it. Release it first if so.
3843	 */
3844	ll_ifma = ifma->ifma_llifma;
3845	if (ll_ifma != NULL) {
3846		KASSERT(ifma->ifma_lladdr != NULL,
3847		    ("%s: llifma w/o lladdr", __func__));
3848		if (detaching)
3849			ll_ifma->ifma_ifp = NULL;	/* XXX */
3850		if (--ll_ifma->ifma_refcount == 0) {
3851			if (ifp != NULL) {
3852				if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
3853					CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
3854						ifma_link);
3855					ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
3856				}
3857			}
3858			if_freemulti(ll_ifma);
3859		}
3860	}
3861#ifdef INVARIANTS
3862	if (ifp) {
3863		struct ifmultiaddr *ifmatmp;
3864
3865		CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link)
3866			MPASS(ifma != ifmatmp);
3867	}
3868#endif
3869	if_freemulti(ifma);
3870	/*
3871	 * The last reference to this instance of struct ifmultiaddr
3872	 * was released; the hardware should be notified of this change.
3873	 */
3874	return 1;
3875}
3876
3877/*
3878 * Set the link layer address on an interface.
3879 *
3880 * At this time we only support certain types of interfaces,
3881 * and we don't allow the length of the address to change.
3882 *
3883 * Set noinline to be dtrace-friendly
3884 */
3885__noinline int
3886if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
3887{
3888	struct sockaddr_dl *sdl;
3889	struct ifaddr *ifa;
3890	struct ifreq ifr;
3891
3892	ifa = ifp->if_addr;
3893	if (ifa == NULL)
3894		return (EINVAL);
3895
3896	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
3897	if (sdl == NULL)
3898		return (EINVAL);
3899
3900	if (len != sdl->sdl_alen)	/* don't allow length to change */
3901		return (EINVAL);
3902
3903	switch (ifp->if_type) {
3904	case IFT_ETHER:
3905	case IFT_XETHER:
3906	case IFT_L2VLAN:
3907	case IFT_BRIDGE:
3908	case IFT_IEEE8023ADLAG:
3909		bcopy(lladdr, LLADDR(sdl), len);
3910		break;
3911	default:
3912		return (ENODEV);
3913	}
3914
3915	/*
3916	 * If the interface is already up, we need
3917	 * to re-init it in order to reprogram its
3918	 * address filter.
3919	 */
3920	if ((ifp->if_flags & IFF_UP) != 0) {
3921		if (ifp->if_ioctl) {
3922			ifp->if_flags &= ~IFF_UP;
3923			ifr.ifr_flags = ifp->if_flags & 0xffff;
3924			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3925			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3926			ifp->if_flags |= IFF_UP;
3927			ifr.ifr_flags = ifp->if_flags & 0xffff;
3928			ifr.ifr_flagshigh = ifp->if_flags >> 16;
3929			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
3930		}
3931	}
3932	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
3933
3934	return (0);
3935}
3936
3937/*
3938 * Compat function for handling basic encapsulation requests.
3939 * Not converted stacks (FDDI, IB, ..) supports traditional
3940 * output model: ARP (and other similar L2 protocols) are handled
3941 * inside output routine, arpresolve/nd6_resolve() returns MAC
3942 * address instead of full prepend.
3943 *
3944 * This function creates calculated header==MAC for IPv4/IPv6 and
3945 * returns EAFNOSUPPORT (which is then handled in ARP code) for other
3946 * address families.
3947 */
3948static int
3949if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
3950{
3951	if (req->rtype != IFENCAP_LL)
3952		return (EOPNOTSUPP);
3953
3954	if (req->bufsize < req->lladdr_len)
3955		return (ENOMEM);
3956
3957	switch (req->family) {
3958	case AF_INET:
3959	case AF_INET6:
3960		break;
3961	default:
3962		return (EAFNOSUPPORT);
3963	}
3964
3965	/* Copy lladdr to storage as is */
3966	memmove(req->buf, req->lladdr, req->lladdr_len);
3967	req->bufsize = req->lladdr_len;
3968	req->lladdr_off = 0;
3969
3970	return (0);
3971}
3972
3973/*
3974 * Tunnel interfaces can nest, also they may cause infinite recursion
3975 * calls when misconfigured. We'll prevent this by detecting loops.
3976 * High nesting level may cause stack exhaustion. We'll prevent this
3977 * by introducing upper limit.
3978 *
3979 * Return 0, if tunnel nesting count is equal or less than limit.
3980 */
3981int
3982if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
3983    int limit)
3984{
3985	struct m_tag *mtag;
3986	int count;
3987
3988	count = 1;
3989	mtag = NULL;
3990	while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) {
3991		if (*(struct ifnet **)(mtag + 1) == ifp) {
3992			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
3993			return (EIO);
3994		}
3995		count++;
3996	}
3997	if (count > limit) {
3998		log(LOG_NOTICE,
3999		    "%s: if_output recursively called too many times(%d)\n",
4000		    if_name(ifp), count);
4001		return (EIO);
4002	}
4003	mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT);
4004	if (mtag == NULL)
4005		return (ENOMEM);
4006	*(struct ifnet **)(mtag + 1) = ifp;
4007	m_tag_prepend(m, mtag);
4008	return (0);
4009}
4010
4011/*
4012 * Get the link layer address that was read from the hardware at attach.
4013 *
4014 * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
4015 * their component interfaces as IFT_IEEE8023ADLAG.
4016 */
4017int
4018if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
4019{
4020	if (ifp->if_hw_addr == NULL)
4021		return (ENODEV);
4022
4023	switch (ifp->if_type) {
4024	case IFT_ETHER:
4025	case IFT_IEEE8023ADLAG:
4026		bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
4027		return (0);
4028	default:
4029		return (ENODEV);
4030	}
4031}
4032
4033/*
4034 * The name argument must be a pointer to storage which will last as
4035 * long as the interface does.  For physical devices, the result of
4036 * device_get_name(dev) is a good choice and for pseudo-devices a
4037 * static string works well.
4038 */
4039void
4040if_initname(struct ifnet *ifp, const char *name, int unit)
4041{
4042	ifp->if_dname = name;
4043	ifp->if_dunit = unit;
4044	if (unit != IF_DUNIT_NONE)
4045		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
4046	else
4047		strlcpy(ifp->if_xname, name, IFNAMSIZ);
4048}
4049
4050static int
4051if_vlog(struct ifnet *ifp, int pri, const char *fmt, va_list ap)
4052{
4053	char if_fmt[256];
4054
4055	snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt);
4056	vlog(pri, if_fmt, ap);
4057	return (0);
4058}
4059
4060
4061int
4062if_printf(struct ifnet *ifp, const char *fmt, ...)
4063{
4064	va_list ap;
4065
4066	va_start(ap, fmt);
4067	if_vlog(ifp, LOG_INFO, fmt, ap);
4068	va_end(ap);
4069	return (0);
4070}
4071
4072int
4073if_log(struct ifnet *ifp, int pri, const char *fmt, ...)
4074{
4075	va_list ap;
4076
4077	va_start(ap, fmt);
4078	if_vlog(ifp, pri, fmt, ap);
4079	va_end(ap);
4080	return (0);
4081}
4082
4083void
4084if_start(struct ifnet *ifp)
4085{
4086
4087	(*(ifp)->if_start)(ifp);
4088}
4089
4090/*
4091 * Backwards compatibility interface for drivers
4092 * that have not implemented it
4093 */
4094static int
4095if_transmit_default(struct ifnet *ifp, struct mbuf *m)
4096{
4097	int error;
4098
4099	IFQ_HANDOFF(ifp, m, error);
4100	return (error);
4101}
4102
4103static void
4104if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
4105{
4106	m_freem(m);
4107}
4108
4109int
4110if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
4111{
4112	int active = 0;
4113
4114	IF_LOCK(ifq);
4115	if (_IF_QFULL(ifq)) {
4116		IF_UNLOCK(ifq);
4117		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
4118		m_freem(m);
4119		return (0);
4120	}
4121	if (ifp != NULL) {
4122		if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
4123		if (m->m_flags & (M_BCAST|M_MCAST))
4124			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
4125		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
4126	}
4127	_IF_ENQUEUE(ifq, m);
4128	IF_UNLOCK(ifq);
4129	if (ifp != NULL && !active)
4130		(*(ifp)->if_start)(ifp);
4131	return (1);
4132}
4133
4134void
4135if_register_com_alloc(u_char type,
4136    if_com_alloc_t *a, if_com_free_t *f)
4137{
4138
4139	KASSERT(if_com_alloc[type] == NULL,
4140	    ("if_register_com_alloc: %d already registered", type));
4141	KASSERT(if_com_free[type] == NULL,
4142	    ("if_register_com_alloc: %d free already registered", type));
4143
4144	if_com_alloc[type] = a;
4145	if_com_free[type] = f;
4146}
4147
4148void
4149if_deregister_com_alloc(u_char type)
4150{
4151
4152	KASSERT(if_com_alloc[type] != NULL,
4153	    ("if_deregister_com_alloc: %d not registered", type));
4154	KASSERT(if_com_free[type] != NULL,
4155	    ("if_deregister_com_alloc: %d free not registered", type));
4156
4157	/*
4158	 * Ensure all pending EPOCH(9) callbacks have been executed. This
4159	 * fixes issues about late invocation of if_destroy(), which leads
4160	 * to memory leak from if_com_alloc[type] allocated if_l2com.
4161	 */
4162	NET_EPOCH_DRAIN_CALLBACKS();
4163
4164	if_com_alloc[type] = NULL;
4165	if_com_free[type] = NULL;
4166}
4167
4168/* API for driver access to network stack owned ifnet.*/
4169uint64_t
4170if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
4171{
4172	uint64_t oldbrate;
4173
4174	oldbrate = ifp->if_baudrate;
4175	ifp->if_baudrate = baudrate;
4176	return (oldbrate);
4177}
4178
4179uint64_t
4180if_getbaudrate(const if_t ifp)
4181{
4182	return (ifp->if_baudrate);
4183}
4184
4185int
4186if_setcapabilities(if_t ifp, int capabilities)
4187{
4188	ifp->if_capabilities = capabilities;
4189	return (0);
4190}
4191
4192int
4193if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
4194{
4195	ifp->if_capabilities &= ~clearbit;
4196	ifp->if_capabilities |= setbit;
4197	return (0);
4198}
4199
4200int
4201if_getcapabilities(const if_t ifp)
4202{
4203	return (ifp->if_capabilities);
4204}
4205
4206int
4207if_setcapenable(if_t ifp, int capabilities)
4208{
4209	ifp->if_capenable = capabilities;
4210	return (0);
4211}
4212
4213int
4214if_setcapenablebit(if_t ifp, int setcap, int clearcap)
4215{
4216	ifp->if_capenable &= ~clearcap;
4217	ifp->if_capenable |= setcap;
4218	return (0);
4219}
4220
4221int
4222if_setcapabilities2(if_t ifp, int capabilities)
4223{
4224	ifp->if_capabilities2 = capabilities;
4225	return (0);
4226}
4227
4228int
4229if_setcapabilities2bit(if_t ifp, int setbit, int clearbit)
4230{
4231	ifp->if_capabilities2 &= ~clearbit;
4232	ifp->if_capabilities2 |= setbit;
4233	return (0);
4234}
4235
4236int
4237if_getcapabilities2(const if_t ifp)
4238{
4239	return (ifp->if_capabilities2);
4240}
4241
4242int
4243if_setcapenable2(if_t ifp, int capabilities2)
4244{
4245	ifp->if_capenable2 = capabilities2;
4246	return (0);
4247}
4248
4249int
4250if_setcapenable2bit(if_t ifp, int setcap, int clearcap)
4251{
4252	ifp->if_capenable2 &= ~clearcap;
4253	ifp->if_capenable2 |= setcap;
4254	return (0);
4255}
4256
4257const char *
4258if_getdname(const if_t ifp)
4259{
4260	return (ifp->if_dname);
4261}
4262
4263void
4264if_setdname(if_t ifp, const char *dname)
4265{
4266	ifp->if_dname = dname;
4267}
4268
4269const char *
4270if_name(if_t ifp)
4271{
4272	return (ifp->if_xname);
4273}
4274
4275int
4276if_setname(if_t ifp, const char *name)
4277{
4278	if (strlen(name) > sizeof(ifp->if_xname) - 1)
4279		return (ENAMETOOLONG);
4280	strcpy(ifp->if_xname, name);
4281
4282	return (0);
4283}
4284
4285int
4286if_togglecapenable(if_t ifp, int togglecap)
4287{
4288	ifp->if_capenable ^= togglecap;
4289	return (0);
4290}
4291
4292int
4293if_getcapenable(const if_t ifp)
4294{
4295	return (ifp->if_capenable);
4296}
4297
4298int
4299if_togglecapenable2(if_t ifp, int togglecap)
4300{
4301	ifp->if_capenable2 ^= togglecap;
4302	return (0);
4303}
4304
4305int
4306if_getcapenable2(const if_t ifp)
4307{
4308	return (ifp->if_capenable2);
4309}
4310
4311int
4312if_getdunit(const if_t ifp)
4313{
4314	return (ifp->if_dunit);
4315}
4316
4317int
4318if_getindex(const if_t ifp)
4319{
4320	return (ifp->if_index);
4321}
4322
4323int
4324if_getidxgen(const if_t ifp)
4325{
4326	return (ifp->if_idxgen);
4327}
4328
4329const char *
4330if_getdescr(if_t ifp)
4331{
4332	return (ifp->if_description);
4333}
4334
4335void
4336if_setdescr(if_t ifp, char *descrbuf)
4337{
4338	sx_xlock(&ifdescr_sx);
4339	char *odescrbuf = ifp->if_description;
4340	ifp->if_description = descrbuf;
4341	sx_xunlock(&ifdescr_sx);
4342
4343	if_freedescr(odescrbuf);
4344}
4345
4346char *
4347if_allocdescr(size_t sz, int malloc_flag)
4348{
4349	malloc_flag &= (M_WAITOK | M_NOWAIT);
4350	return (malloc(sz, M_IFDESCR, M_ZERO | malloc_flag));
4351}
4352
4353void
4354if_freedescr(char *descrbuf)
4355{
4356	free(descrbuf, M_IFDESCR);
4357}
4358
4359int
4360if_getalloctype(const if_t ifp)
4361{
4362	return (ifp->if_alloctype);
4363}
4364
4365void
4366if_setlastchange(if_t ifp)
4367{
4368	getmicrotime(&ifp->if_lastchange);
4369}
4370
4371/*
4372 * This is largely undesirable because it ties ifnet to a device, but does
4373 * provide flexiblity for an embedded product vendor. Should be used with
4374 * the understanding that it violates the interface boundaries, and should be
4375 * a last resort only.
4376 */
4377int
4378if_setdev(if_t ifp, void *dev)
4379{
4380	return (0);
4381}
4382
4383int
4384if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
4385{
4386	ifp->if_drv_flags &= ~clear_flags;
4387	ifp->if_drv_flags |= set_flags;
4388
4389	return (0);
4390}
4391
4392int
4393if_getdrvflags(const if_t ifp)
4394{
4395	return (ifp->if_drv_flags);
4396}
4397
4398int
4399if_setdrvflags(if_t ifp, int flags)
4400{
4401	ifp->if_drv_flags = flags;
4402	return (0);
4403}
4404
4405int
4406if_setflags(if_t ifp, int flags)
4407{
4408	ifp->if_flags = flags;
4409	return (0);
4410}
4411
4412int
4413if_setflagbits(if_t ifp, int set, int clear)
4414{
4415	ifp->if_flags &= ~clear;
4416	ifp->if_flags |= set;
4417	return (0);
4418}
4419
4420int
4421if_getflags(const if_t ifp)
4422{
4423	return (ifp->if_flags);
4424}
4425
4426int
4427if_clearhwassist(if_t ifp)
4428{
4429	ifp->if_hwassist = 0;
4430	return (0);
4431}
4432
4433int
4434if_sethwassistbits(if_t ifp, int toset, int toclear)
4435{
4436	ifp->if_hwassist &= ~toclear;
4437	ifp->if_hwassist |= toset;
4438
4439	return (0);
4440}
4441
4442int
4443if_sethwassist(if_t ifp, int hwassist_bit)
4444{
4445	ifp->if_hwassist = hwassist_bit;
4446	return (0);
4447}
4448
4449int
4450if_gethwassist(const if_t ifp)
4451{
4452	return (ifp->if_hwassist);
4453}
4454
4455int
4456if_togglehwassist(if_t ifp, int toggle_bits)
4457{
4458	ifp->if_hwassist ^= toggle_bits;
4459	return (0);
4460}
4461
4462int
4463if_setmtu(if_t ifp, int mtu)
4464{
4465	ifp->if_mtu = mtu;
4466	return (0);
4467}
4468
4469void
4470if_notifymtu(if_t ifp)
4471{
4472#ifdef INET6
4473	nd6_setmtu(ifp);
4474#endif
4475	rt_updatemtu(ifp);
4476}
4477
4478int
4479if_getmtu(const if_t ifp)
4480{
4481	return (ifp->if_mtu);
4482}
4483
4484int
4485if_getmtu_family(const if_t ifp, int family)
4486{
4487	struct domain *dp;
4488
4489	SLIST_FOREACH(dp, &domains, dom_next) {
4490		if (dp->dom_family == family && dp->dom_ifmtu != NULL)
4491			return (dp->dom_ifmtu(ifp));
4492	}
4493
4494	return (ifp->if_mtu);
4495}
4496
4497/*
4498 * Methods for drivers to access interface unicast and multicast
4499 * link level addresses.  Driver shall not know 'struct ifaddr' neither
4500 * 'struct ifmultiaddr'.
4501 */
4502u_int
4503if_lladdr_count(if_t ifp)
4504{
4505	struct epoch_tracker et;
4506	struct ifaddr *ifa;
4507	u_int count;
4508
4509	count = 0;
4510	NET_EPOCH_ENTER(et);
4511	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
4512		if (ifa->ifa_addr->sa_family == AF_LINK)
4513			count++;
4514	NET_EPOCH_EXIT(et);
4515
4516	return (count);
4517}
4518
4519int
4520if_foreach(if_foreach_cb_t cb, void *cb_arg)
4521{
4522	if_t ifp;
4523	int error;
4524
4525	NET_EPOCH_ASSERT();
4526	MPASS(cb);
4527
4528	error = 0;
4529	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
4530		error = cb(ifp, cb_arg);
4531		if (error != 0)
4532			break;
4533	}
4534
4535	return (error);
4536}
4537
4538/*
4539 * Iterates over the list of interfaces, permitting callback function @cb to sleep.
4540 * Stops iteration if @cb returns non-zero error code.
4541 * Returns the last error code from @cb.
4542 * @match_cb: optional match callback limiting the iteration to only matched interfaces
4543 * @match_arg: argument to pass to @match_cb
4544 * @cb: iteration callback
4545 * @cb_arg: argument to pass to @cb
4546 */
4547int
4548if_foreach_sleep(if_foreach_match_t match_cb, void *match_arg, if_foreach_cb_t cb,
4549    void *cb_arg)
4550{
4551	int match_count = 0, array_size = 16; /* 128 bytes for malloc */
4552	struct ifnet **match_array = NULL;
4553	int error = 0;
4554
4555	MPASS(cb);
4556
4557	while (true) {
4558		struct ifnet **new_array;
4559		int new_size = array_size;
4560		struct epoch_tracker et;
4561		struct ifnet *ifp;
4562
4563		while (new_size < match_count)
4564			new_size *= 2;
4565		new_array = malloc(new_size * sizeof(void *), M_TEMP, M_WAITOK);
4566		if (match_array != NULL)
4567			memcpy(new_array, match_array, array_size * sizeof(void *));
4568		free(match_array, M_TEMP);
4569		match_array = new_array;
4570		array_size = new_size;
4571
4572		match_count = 0;
4573		NET_EPOCH_ENTER(et);
4574		CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
4575			if (match_cb != NULL && !match_cb(ifp, match_arg))
4576				continue;
4577			if (match_count < array_size) {
4578				if (if_try_ref(ifp))
4579					match_array[match_count++] = ifp;
4580			} else
4581				match_count++;
4582		}
4583		NET_EPOCH_EXIT(et);
4584
4585		if (match_count > array_size) {
4586			for (int i = 0; i < array_size; i++)
4587				if_rele(match_array[i]);
4588			continue;
4589		} else {
4590			for (int i = 0; i < match_count; i++) {
4591				if (error == 0)
4592					error = cb(match_array[i], cb_arg);
4593				if_rele(match_array[i]);
4594			}
4595			free(match_array, M_TEMP);
4596			break;
4597		}
4598	}
4599
4600	return (error);
4601}
4602
4603
4604/*
4605 * Uses just 1 pointer of the 4 available in the public struct.
4606 */
4607if_t
4608if_iter_start(struct if_iter *iter)
4609{
4610	if_t ifp;
4611
4612	NET_EPOCH_ASSERT();
4613
4614	bzero(iter, sizeof(*iter));
4615	ifp = CK_STAILQ_FIRST(&V_ifnet);
4616	if (ifp != NULL)
4617		iter->context[0] = CK_STAILQ_NEXT(ifp, if_link);
4618	else
4619		iter->context[0] = NULL;
4620	return (ifp);
4621}
4622
4623if_t
4624if_iter_next(struct if_iter *iter)
4625{
4626	if_t cur_ifp = iter->context[0];
4627
4628	if (cur_ifp != NULL)
4629		iter->context[0] = CK_STAILQ_NEXT(cur_ifp, if_link);
4630	return (cur_ifp);
4631}
4632
4633void
4634if_iter_finish(struct if_iter *iter)
4635{
4636	/* Nothing to do here for now. */
4637}
4638
4639u_int
4640if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
4641{
4642	struct epoch_tracker et;
4643	struct ifaddr *ifa;
4644	u_int count;
4645
4646	MPASS(cb);
4647
4648	count = 0;
4649	NET_EPOCH_ENTER(et);
4650	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
4651		if (ifa->ifa_addr->sa_family != AF_LINK)
4652			continue;
4653		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr,
4654		    count);
4655	}
4656	NET_EPOCH_EXIT(et);
4657
4658	return (count);
4659}
4660
4661u_int
4662if_llmaddr_count(if_t ifp)
4663{
4664	struct epoch_tracker et;
4665	struct ifmultiaddr *ifma;
4666	int count;
4667
4668	count = 0;
4669	NET_EPOCH_ENTER(et);
4670	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
4671		if (ifma->ifma_addr->sa_family == AF_LINK)
4672			count++;
4673	NET_EPOCH_EXIT(et);
4674
4675	return (count);
4676}
4677
4678bool
4679if_maddr_empty(if_t ifp)
4680{
4681
4682	return (CK_STAILQ_EMPTY(&ifp->if_multiaddrs));
4683}
4684
4685u_int
4686if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
4687{
4688	struct epoch_tracker et;
4689	struct ifmultiaddr *ifma;
4690	u_int count;
4691
4692	MPASS(cb);
4693
4694	count = 0;
4695	NET_EPOCH_ENTER(et);
4696	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
4697		if (ifma->ifma_addr->sa_family != AF_LINK)
4698			continue;
4699		count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr,
4700		    count);
4701	}
4702	NET_EPOCH_EXIT(et);
4703
4704	return (count);
4705}
4706
4707u_int
4708if_foreach_addr_type(if_t ifp, int type, if_addr_cb_t cb, void *cb_arg)
4709{
4710	struct epoch_tracker et;
4711	struct ifaddr *ifa;
4712	u_int count;
4713
4714	MPASS(cb);
4715
4716	count = 0;
4717	NET_EPOCH_ENTER(et);
4718	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
4719		if (ifa->ifa_addr->sa_family != type)
4720			continue;
4721		count += (*cb)(cb_arg, ifa, count);
4722	}
4723	NET_EPOCH_EXIT(et);
4724
4725	return (count);
4726}
4727
4728struct ifaddr *
4729ifa_iter_start(if_t ifp, struct ifa_iter *iter)
4730{
4731	struct ifaddr *ifa;
4732
4733	NET_EPOCH_ASSERT();
4734
4735	bzero(iter, sizeof(*iter));
4736	ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
4737	if (ifa != NULL)
4738		iter->context[0] = CK_STAILQ_NEXT(ifa, ifa_link);
4739	else
4740		iter->context[0] = NULL;
4741	return (ifa);
4742}
4743
4744struct ifaddr *
4745ifa_iter_next(struct ifa_iter *iter)
4746{
4747	struct ifaddr *ifa = iter->context[0];
4748
4749	if (ifa != NULL)
4750		iter->context[0] = CK_STAILQ_NEXT(ifa, ifa_link);
4751	return (ifa);
4752}
4753
4754void
4755ifa_iter_finish(struct ifa_iter *iter)
4756{
4757	/* Nothing to do here for now. */
4758}
4759
4760int
4761if_setsoftc(if_t ifp, void *softc)
4762{
4763	ifp->if_softc = softc;
4764	return (0);
4765}
4766
4767void *
4768if_getsoftc(const if_t ifp)
4769{
4770	return (ifp->if_softc);
4771}
4772
4773void
4774if_setrcvif(struct mbuf *m, if_t ifp)
4775{
4776
4777	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
4778	m->m_pkthdr.rcvif = (struct ifnet *)ifp;
4779}
4780
4781void
4782if_setvtag(struct mbuf *m, uint16_t tag)
4783{
4784	m->m_pkthdr.ether_vtag = tag;
4785}
4786
4787uint16_t
4788if_getvtag(struct mbuf *m)
4789{
4790	return (m->m_pkthdr.ether_vtag);
4791}
4792
4793int
4794if_sendq_empty(if_t ifp)
4795{
4796	return (IFQ_DRV_IS_EMPTY(&ifp->if_snd));
4797}
4798
4799struct ifaddr *
4800if_getifaddr(const if_t ifp)
4801{
4802	return (ifp->if_addr);
4803}
4804
4805int
4806if_getamcount(const if_t ifp)
4807{
4808	return (ifp->if_amcount);
4809}
4810
4811int
4812if_setsendqready(if_t ifp)
4813{
4814	IFQ_SET_READY(&ifp->if_snd);
4815	return (0);
4816}
4817
4818int
4819if_setsendqlen(if_t ifp, int tx_desc_count)
4820{
4821	IFQ_SET_MAXLEN(&ifp->if_snd, tx_desc_count);
4822	ifp->if_snd.ifq_drv_maxlen = tx_desc_count;
4823	return (0);
4824}
4825
4826void
4827if_setnetmapadapter(if_t ifp, struct netmap_adapter *na)
4828{
4829	ifp->if_netmap = na;
4830}
4831
4832struct netmap_adapter *
4833if_getnetmapadapter(if_t ifp)
4834{
4835	return (ifp->if_netmap);
4836}
4837
4838int
4839if_vlantrunkinuse(if_t ifp)
4840{
4841	return (ifp->if_vlantrunk != NULL);
4842}
4843
4844void
4845if_init(if_t ifp, void *ctx)
4846{
4847	(*ifp->if_init)(ctx);
4848}
4849
4850void
4851if_input(if_t ifp, struct mbuf* sendmp)
4852{
4853	(*ifp->if_input)(ifp, sendmp);
4854}
4855
4856int
4857if_transmit(if_t ifp, struct mbuf *m)
4858{
4859	return ((*ifp->if_transmit)(ifp, m));
4860}
4861
4862int
4863if_resolvemulti(if_t ifp, struct sockaddr **srcs, struct sockaddr *dst)
4864{
4865	if (ifp->if_resolvemulti == NULL)
4866		return (EOPNOTSUPP);
4867
4868	return (ifp->if_resolvemulti(ifp, srcs, dst));
4869}
4870
4871int
4872if_ioctl(if_t ifp, u_long cmd, void *data)
4873{
4874	if (ifp->if_ioctl == NULL)
4875		return (EOPNOTSUPP);
4876
4877	return (ifp->if_ioctl(ifp, cmd, data));
4878}
4879
4880struct mbuf *
4881if_dequeue(if_t ifp)
4882{
4883	struct mbuf *m;
4884
4885	IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
4886	return (m);
4887}
4888
4889int
4890if_sendq_prepend(if_t ifp, struct mbuf *m)
4891{
4892	IFQ_DRV_PREPEND(&ifp->if_snd, m);
4893	return (0);
4894}
4895
4896int
4897if_setifheaderlen(if_t ifp, int len)
4898{
4899	ifp->if_hdrlen = len;
4900	return (0);
4901}
4902
4903caddr_t
4904if_getlladdr(const if_t ifp)
4905{
4906	return (IF_LLADDR(ifp));
4907}
4908
4909void *
4910if_gethandle(u_char type)
4911{
4912	return (if_alloc(type));
4913}
4914
4915void
4916if_vlancap(if_t ifp)
4917{
4918	VLAN_CAPABILITIES(ifp);
4919}
4920
4921int
4922if_sethwtsomax(if_t ifp, u_int if_hw_tsomax)
4923{
4924	ifp->if_hw_tsomax = if_hw_tsomax;
4925        return (0);
4926}
4927
4928int
4929if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount)
4930{
4931	ifp->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount;
4932        return (0);
4933}
4934
4935int
4936if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize)
4937{
4938	ifp->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize;
4939        return (0);
4940}
4941
4942u_int
4943if_gethwtsomax(const if_t ifp)
4944{
4945	return (ifp->if_hw_tsomax);
4946}
4947
4948u_int
4949if_gethwtsomaxsegcount(const if_t ifp)
4950{
4951	return (ifp->if_hw_tsomaxsegcount);
4952}
4953
4954u_int
4955if_gethwtsomaxsegsize(const if_t ifp)
4956{
4957	return (ifp->if_hw_tsomaxsegsize);
4958}
4959
4960void
4961if_setinitfn(if_t ifp, if_init_fn_t init_fn)
4962{
4963	ifp->if_init = init_fn;
4964}
4965
4966void
4967if_setinputfn(if_t ifp, if_input_fn_t input_fn)
4968{
4969	ifp->if_input = input_fn;
4970}
4971
4972if_input_fn_t
4973if_getinputfn(if_t ifp)
4974{
4975	return (ifp->if_input);
4976}
4977
4978void
4979if_setioctlfn(if_t ifp, if_ioctl_fn_t ioctl_fn)
4980{
4981	ifp->if_ioctl = ioctl_fn;
4982}
4983
4984void
4985if_setoutputfn(if_t ifp, if_output_fn_t output_fn)
4986{
4987	ifp->if_output = output_fn;
4988}
4989
4990void
4991if_setstartfn(if_t ifp, if_start_fn_t start_fn)
4992{
4993	ifp->if_start = start_fn;
4994}
4995
4996if_start_fn_t
4997if_getstartfn(if_t ifp)
4998{
4999	return (ifp->if_start);
5000}
5001
5002void
5003if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
5004{
5005	ifp->if_transmit = start_fn;
5006}
5007
5008if_transmit_fn_t
5009if_gettransmitfn(if_t ifp)
5010{
5011	return (ifp->if_transmit);
5012}
5013
5014void
5015if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
5016{
5017	ifp->if_qflush = flush_fn;
5018}
5019
5020void
5021if_setsndtagallocfn(if_t ifp, if_snd_tag_alloc_t alloc_fn)
5022{
5023	ifp->if_snd_tag_alloc = alloc_fn;
5024}
5025
5026int
5027if_snd_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params,
5028    struct m_snd_tag **mstp)
5029{
5030	if (ifp->if_snd_tag_alloc == NULL)
5031		return (EOPNOTSUPP);
5032	return (ifp->if_snd_tag_alloc(ifp, params, mstp));
5033}
5034
5035void
5036if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
5037{
5038	ifp->if_get_counter = fn;
5039}
5040
5041void
5042if_setreassignfn(if_t ifp, if_reassign_fn_t fn)
5043{
5044	ifp->if_reassign = fn;
5045}
5046
5047void
5048if_setratelimitqueryfn(if_t ifp, if_ratelimit_query_t fn)
5049{
5050	ifp->if_ratelimit_query = fn;
5051}
5052
5053void
5054if_setdebugnet_methods(if_t ifp, struct debugnet_methods *m)
5055{
5056	ifp->if_debugnet_methods = m;
5057}
5058
5059struct label *
5060if_getmaclabel(if_t ifp)
5061{
5062	return (ifp->if_label);
5063}
5064
5065void
5066if_setmaclabel(if_t ifp, struct label *label)
5067{
5068	ifp->if_label = label;
5069}
5070
5071int
5072if_gettype(if_t ifp)
5073{
5074	return (ifp->if_type);
5075}
5076
5077void *
5078if_getllsoftc(if_t ifp)
5079{
5080	return (ifp->if_llsoftc);
5081}
5082
5083void
5084if_setllsoftc(if_t ifp, void *llsoftc)
5085{
5086	ifp->if_llsoftc = llsoftc;
5087};
5088
5089int
5090if_getlinkstate(if_t ifp)
5091{
5092	return (ifp->if_link_state);
5093}
5094
5095const uint8_t *
5096if_getbroadcastaddr(if_t ifp)
5097{
5098	return (ifp->if_broadcastaddr);
5099}
5100
5101void
5102if_setbroadcastaddr(if_t ifp, const uint8_t *addr)
5103{
5104	ifp->if_broadcastaddr = addr;
5105}
5106
5107int
5108if_getnumadomain(if_t ifp)
5109{
5110	return (ifp->if_numa_domain);
5111}
5112
5113uint64_t
5114if_getcounter(if_t ifp, ift_counter counter)
5115{
5116	return (ifp->if_get_counter(ifp, counter));
5117}
5118
5119bool
5120if_altq_is_enabled(if_t ifp)
5121{
5122	return (ALTQ_IS_ENABLED(&ifp->if_snd));
5123}
5124
5125struct vnet *
5126if_getvnet(if_t ifp)
5127{
5128	return (ifp->if_vnet);
5129}
5130
5131void *
5132if_getafdata(if_t ifp, int af)
5133{
5134	return (ifp->if_afdata[af]);
5135}
5136
5137u_int
5138if_getfib(if_t ifp)
5139{
5140	return (ifp->if_fib);
5141}
5142
5143uint8_t
5144if_getaddrlen(if_t ifp)
5145{
5146	return (ifp->if_addrlen);
5147}
5148
5149struct bpf_if *
5150if_getbpf(if_t ifp)
5151{
5152	return (ifp->if_bpf);
5153}
5154
5155struct ifvlantrunk *
5156if_getvlantrunk(if_t ifp)
5157{
5158	return (ifp->if_vlantrunk);
5159}
5160
5161uint8_t
5162if_getpcp(if_t ifp)
5163{
5164	return (ifp->if_pcp);
5165}
5166
5167void *
5168if_getl2com(if_t ifp)
5169{
5170	return (ifp->if_l2com);
5171}
5172
5173#ifdef DDB
5174static void
5175if_show_ifnet(struct ifnet *ifp)
5176{
5177	if (ifp == NULL)
5178		return;
5179	db_printf("%s:\n", ifp->if_xname);
5180#define	IF_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, ifp->e);
5181	IF_DB_PRINTF("%s", if_dname);
5182	IF_DB_PRINTF("%d", if_dunit);
5183	IF_DB_PRINTF("%s", if_description);
5184	IF_DB_PRINTF("%u", if_index);
5185	IF_DB_PRINTF("%d", if_idxgen);
5186	IF_DB_PRINTF("%u", if_refcount);
5187	IF_DB_PRINTF("%p", if_softc);
5188	IF_DB_PRINTF("%p", if_l2com);
5189	IF_DB_PRINTF("%p", if_llsoftc);
5190	IF_DB_PRINTF("%d", if_amcount);
5191	IF_DB_PRINTF("%p", if_addr);
5192	IF_DB_PRINTF("%p", if_broadcastaddr);
5193	IF_DB_PRINTF("%p", if_afdata);
5194	IF_DB_PRINTF("%d", if_afdata_initialized);
5195	IF_DB_PRINTF("%u", if_fib);
5196	IF_DB_PRINTF("%p", if_vnet);
5197	IF_DB_PRINTF("%p", if_home_vnet);
5198	IF_DB_PRINTF("%p", if_vlantrunk);
5199	IF_DB_PRINTF("%p", if_bpf);
5200	IF_DB_PRINTF("%u", if_pcount);
5201	IF_DB_PRINTF("%p", if_bridge);
5202	IF_DB_PRINTF("%p", if_lagg);
5203	IF_DB_PRINTF("%p", if_pf_kif);
5204	IF_DB_PRINTF("%p", if_carp);
5205	IF_DB_PRINTF("%p", if_label);
5206	IF_DB_PRINTF("%p", if_netmap);
5207	IF_DB_PRINTF("0x%08x", if_flags);
5208	IF_DB_PRINTF("0x%08x", if_drv_flags);
5209	IF_DB_PRINTF("0x%08x", if_capabilities);
5210	IF_DB_PRINTF("0x%08x", if_capenable);
5211	IF_DB_PRINTF("%p", if_snd.ifq_head);
5212	IF_DB_PRINTF("%p", if_snd.ifq_tail);
5213	IF_DB_PRINTF("%d", if_snd.ifq_len);
5214	IF_DB_PRINTF("%d", if_snd.ifq_maxlen);
5215	IF_DB_PRINTF("%p", if_snd.ifq_drv_head);
5216	IF_DB_PRINTF("%p", if_snd.ifq_drv_tail);
5217	IF_DB_PRINTF("%d", if_snd.ifq_drv_len);
5218	IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen);
5219	IF_DB_PRINTF("%d", if_snd.altq_type);
5220	IF_DB_PRINTF("%x", if_snd.altq_flags);
5221#undef IF_DB_PRINTF
5222}
5223
5224DB_SHOW_COMMAND(ifnet, db_show_ifnet)
5225{
5226	if (!have_addr) {
5227		db_printf("usage: show ifnet <struct ifnet *>\n");
5228		return;
5229	}
5230
5231	if_show_ifnet((struct ifnet *)addr);
5232}
5233
5234DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets)
5235{
5236	struct ifnet *ifp;
5237	u_short idx;
5238
5239	for (idx = 1; idx <= if_index; idx++) {
5240		ifp = ifindex_table[idx].ife_ifnet;
5241		if (ifp == NULL)
5242			continue;
5243		db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
5244		if (db_pager_quit)
5245			break;
5246	}
5247}
5248#endif	/* DDB */
5249