1/*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
2
3/*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright 2001 Wasabi Systems, Inc.
7 * All rights reserved.
8 *
9 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed for the NetBSD Project by
22 *	Wasabi Systems, Inc.
23 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
24 *    or promote products derived from this software without specific prior
25 *    written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40/*
41 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
42 * All rights reserved.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 *    notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 *    notice, this list of conditions and the following disclaimer in the
51 *    documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
56 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
57 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
58 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
59 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
61 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
62 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 *
65 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
66 */
67
68/*
69 * Network interface bridge support.
70 *
71 * TODO:
72 *
73 *	- Currently only supports Ethernet-like interfaces (Ethernet,
74 *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
75 *	  to bridge other types of interfaces (maybe consider
76 *	  heterogeneous bridges).
77 */
78
79#include <sys/cdefs.h>
80#include "opt_inet.h"
81#include "opt_inet6.h"
82
83#include <sys/param.h>
84#include <sys/eventhandler.h>
85#include <sys/mbuf.h>
86#include <sys/malloc.h>
87#include <sys/protosw.h>
88#include <sys/systm.h>
89#include <sys/jail.h>
90#include <sys/time.h>
91#include <sys/socket.h> /* for net/if.h */
92#include <sys/sockio.h>
93#include <sys/ctype.h>  /* string functions */
94#include <sys/kernel.h>
95#include <sys/random.h>
96#include <sys/syslog.h>
97#include <sys/sysctl.h>
98#include <vm/uma.h>
99#include <sys/module.h>
100#include <sys/priv.h>
101#include <sys/proc.h>
102#include <sys/lock.h>
103#include <sys/mutex.h>
104
105#include <net/bpf.h>
106#include <net/if.h>
107#include <net/if_clone.h>
108#include <net/if_dl.h>
109#include <net/if_types.h>
110#include <net/if_var.h>
111#include <net/if_private.h>
112#include <net/pfil.h>
113#include <net/vnet.h>
114
115#include <netinet/in.h>
116#include <netinet/in_systm.h>
117#include <netinet/in_var.h>
118#include <netinet/ip.h>
119#include <netinet/ip_var.h>
120#ifdef INET6
121#include <netinet/ip6.h>
122#include <netinet6/ip6_var.h>
123#include <netinet6/in6_ifattach.h>
124#endif
125#if defined(INET) || defined(INET6)
126#include <netinet/ip_carp.h>
127#endif
128#include <machine/in_cksum.h>
129#include <netinet/if_ether.h>
130#include <net/bridgestp.h>
131#include <net/if_bridgevar.h>
132#include <net/if_llc.h>
133#include <net/if_vlan_var.h>
134
135#include <net/route.h>
136
137/*
138 * At various points in the code we need to know if we're hooked into the INET
139 * and/or INET6 pfil.  Define some macros to do that based on which IP versions
140 * are enabled in the kernel.  This avoids littering the rest of the code with
141 * #ifnet INET6 to avoid referencing V_inet6_pfil_head.
142 */
143#ifdef INET6
144#define		PFIL_HOOKED_IN_INET6	PFIL_HOOKED_IN(V_inet6_pfil_head)
145#define		PFIL_HOOKED_OUT_INET6	PFIL_HOOKED_OUT(V_inet6_pfil_head)
146#else
147#define		PFIL_HOOKED_IN_INET6	false
148#define		PFIL_HOOKED_OUT_INET6	false
149#endif
150
151#ifdef INET
152#define		PFIL_HOOKED_IN_INET	PFIL_HOOKED_IN(V_inet_pfil_head)
153#define		PFIL_HOOKED_OUT_INET	PFIL_HOOKED_OUT(V_inet_pfil_head)
154#else
155#define		PFIL_HOOKED_IN_INET	false
156#define		PFIL_HOOKED_OUT_INET	false
157#endif
158
159#define		PFIL_HOOKED_IN_46	(PFIL_HOOKED_IN_INET6 || PFIL_HOOKED_IN_INET)
160#define		PFIL_HOOKED_OUT_46	(PFIL_HOOKED_OUT_INET6 || PFIL_HOOKED_OUT_INET)
161
162/*
163 * Size of the route hash table.  Must be a power of two.
164 */
165#ifndef BRIDGE_RTHASH_SIZE
166#define	BRIDGE_RTHASH_SIZE		1024
167#endif
168
169#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
170
171/*
172 * Default maximum number of addresses to cache.
173 */
174#ifndef BRIDGE_RTABLE_MAX
175#define	BRIDGE_RTABLE_MAX		2000
176#endif
177
178/*
179 * Timeout (in seconds) for entries learned dynamically.
180 */
181#ifndef BRIDGE_RTABLE_TIMEOUT
182#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
183#endif
184
185/*
186 * Number of seconds between walks of the route list.
187 */
188#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
189#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
190#endif
191
192/*
193 * List of capabilities to possibly mask on the member interface.
194 */
195#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
196					 IFCAP_TXCSUM_IPV6)
197
198/*
199 * List of capabilities to strip
200 */
201#define	BRIDGE_IFCAPS_STRIP		IFCAP_LRO
202
203/*
204 * Bridge locking
205 *
206 * The bridge relies heavily on the epoch(9) system to protect its data
207 * structures. This means we can safely use CK_LISTs while in NET_EPOCH, but we
208 * must ensure there is only one writer at a time.
209 *
210 * That is: for read accesses we only need to be in NET_EPOCH, but for write
211 * accesses we must hold:
212 *
213 *  - BRIDGE_RT_LOCK, for any change to bridge_rtnodes
214 *  - BRIDGE_LOCK, for any other change
215 *
216 * The BRIDGE_LOCK is a sleepable lock, because it is held across ioctl()
217 * calls to bridge member interfaces and these ioctl()s can sleep.
218 * The BRIDGE_RT_LOCK is a non-sleepable mutex, because it is sometimes
219 * required while we're in NET_EPOCH and then we're not allowed to sleep.
220 */
221#define BRIDGE_LOCK_INIT(_sc)		do {			\
222	sx_init(&(_sc)->sc_sx, "if_bridge");			\
223	mtx_init(&(_sc)->sc_rt_mtx, "if_bridge rt", NULL, MTX_DEF);	\
224} while (0)
225#define BRIDGE_LOCK_DESTROY(_sc)	do {	\
226	sx_destroy(&(_sc)->sc_sx);		\
227	mtx_destroy(&(_sc)->sc_rt_mtx);		\
228} while (0)
229#define BRIDGE_LOCK(_sc)		sx_xlock(&(_sc)->sc_sx)
230#define BRIDGE_UNLOCK(_sc)		sx_xunlock(&(_sc)->sc_sx)
231#define BRIDGE_LOCK_ASSERT(_sc)		sx_assert(&(_sc)->sc_sx, SX_XLOCKED)
232#define BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(_sc)	\
233	    MPASS(in_epoch(net_epoch_preempt) || sx_xlocked(&(_sc)->sc_sx))
234#define BRIDGE_UNLOCK_ASSERT(_sc)	sx_assert(&(_sc)->sc_sx, SX_UNLOCKED)
235#define BRIDGE_RT_LOCK(_sc)		mtx_lock(&(_sc)->sc_rt_mtx)
236#define BRIDGE_RT_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_rt_mtx)
237#define BRIDGE_RT_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_rt_mtx, MA_OWNED)
238#define BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(_sc)	\
239	    MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(_sc)->sc_rt_mtx))
240
241/*
242 * Bridge interface list entry.
243 */
244struct bridge_iflist {
245	CK_LIST_ENTRY(bridge_iflist) bif_next;
246	struct ifnet		*bif_ifp;	/* member if */
247	struct bstp_port	bif_stp;	/* STP state */
248	uint32_t		bif_flags;	/* member if flags */
249	int			bif_savedcaps;	/* saved capabilities */
250	uint32_t		bif_addrmax;	/* max # of addresses */
251	uint32_t		bif_addrcnt;	/* cur. # of addresses */
252	uint32_t		bif_addrexceeded;/* # of address violations */
253	struct epoch_context	bif_epoch_ctx;
254};
255
256/*
257 * Bridge route node.
258 */
259struct bridge_rtnode {
260	CK_LIST_ENTRY(bridge_rtnode) brt_hash;	/* hash table linkage */
261	CK_LIST_ENTRY(bridge_rtnode) brt_list;	/* list linkage */
262	struct bridge_iflist	*brt_dst;	/* destination if */
263	unsigned long		brt_expire;	/* expiration time */
264	uint8_t			brt_flags;	/* address flags */
265	uint8_t			brt_addr[ETHER_ADDR_LEN];
266	uint16_t		brt_vlan;	/* vlan id */
267	struct	vnet		*brt_vnet;
268	struct	epoch_context	brt_epoch_ctx;
269};
270#define	brt_ifp			brt_dst->bif_ifp
271
272/*
273 * Software state for each bridge.
274 */
275struct bridge_softc {
276	struct ifnet		*sc_ifp;	/* make this an interface */
277	LIST_ENTRY(bridge_softc) sc_list;
278	struct sx		sc_sx;
279	struct mtx		sc_rt_mtx;
280	uint32_t		sc_brtmax;	/* max # of addresses */
281	uint32_t		sc_brtcnt;	/* cur. # of addresses */
282	uint32_t		sc_brttimeout;	/* rt timeout in seconds */
283	struct callout		sc_brcallout;	/* bridge callout */
284	CK_LIST_HEAD(, bridge_iflist) sc_iflist;	/* member interface list */
285	CK_LIST_HEAD(, bridge_rtnode) *sc_rthash;	/* our forwarding table */
286	CK_LIST_HEAD(, bridge_rtnode) sc_rtlist;	/* list version of above */
287	uint32_t		sc_rthash_key;	/* key for hash */
288	CK_LIST_HEAD(, bridge_iflist) sc_spanlist;	/* span ports list */
289	struct bstp_state	sc_stp;		/* STP state */
290	uint32_t		sc_brtexceeded;	/* # of cache drops */
291	struct ifnet		*sc_ifaddr;	/* member mac copied from */
292	struct ether_addr	sc_defaddr;	/* Default MAC address */
293	if_input_fn_t		sc_if_input;	/* Saved copy of if_input */
294	struct epoch_context	sc_epoch_ctx;
295};
296
297VNET_DEFINE_STATIC(struct sx, bridge_list_sx);
298#define	V_bridge_list_sx	VNET(bridge_list_sx)
299static eventhandler_tag bridge_detach_cookie;
300
301int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
302
303VNET_DEFINE_STATIC(uma_zone_t, bridge_rtnode_zone);
304#define	V_bridge_rtnode_zone	VNET(bridge_rtnode_zone)
305
306static int	bridge_clone_create(struct if_clone *, char *, size_t,
307		    struct ifc_data *, struct ifnet **);
308static int	bridge_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
309
310static int	bridge_ioctl(struct ifnet *, u_long, caddr_t);
311static void	bridge_mutecaps(struct bridge_softc *);
312static void	bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
313		    int);
314static void	bridge_ifdetach(void *arg __unused, struct ifnet *);
315static void	bridge_init(void *);
316static void	bridge_dummynet(struct mbuf *, struct ifnet *);
317static void	bridge_stop(struct ifnet *, int);
318static int	bridge_transmit(struct ifnet *, struct mbuf *);
319#ifdef ALTQ
320static void	bridge_altq_start(if_t);
321static int	bridge_altq_transmit(if_t, struct mbuf *);
322#endif
323static void	bridge_qflush(struct ifnet *);
324static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
325static void	bridge_inject(struct ifnet *, struct mbuf *);
326static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
327		    struct rtentry *);
328static int	bridge_enqueue(struct bridge_softc *, struct ifnet *,
329		    struct mbuf *);
330static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
331
332static void	bridge_forward(struct bridge_softc *, struct bridge_iflist *,
333		    struct mbuf *m);
334
335static void	bridge_timer(void *);
336
337static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
338		    struct mbuf *, int);
339static void	bridge_span(struct bridge_softc *, struct mbuf *);
340
341static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
342		    uint16_t, struct bridge_iflist *, int, uint8_t);
343static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
344		    uint16_t);
345static void	bridge_rttrim(struct bridge_softc *);
346static void	bridge_rtage(struct bridge_softc *);
347static void	bridge_rtflush(struct bridge_softc *, int);
348static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
349		    uint16_t);
350
351static void	bridge_rtable_init(struct bridge_softc *);
352static void	bridge_rtable_fini(struct bridge_softc *);
353
354static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
355static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
356		    const uint8_t *, uint16_t);
357static int	bridge_rtnode_insert(struct bridge_softc *,
358		    struct bridge_rtnode *);
359static void	bridge_rtnode_destroy(struct bridge_softc *,
360		    struct bridge_rtnode *);
361static void	bridge_rtable_expire(struct ifnet *, int);
362static void	bridge_state_change(struct ifnet *, int);
363
364static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
365		    const char *name);
366static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
367		    struct ifnet *ifp);
368static void	bridge_delete_member(struct bridge_softc *,
369		    struct bridge_iflist *, int);
370static void	bridge_delete_span(struct bridge_softc *,
371		    struct bridge_iflist *);
372
373static int	bridge_ioctl_add(struct bridge_softc *, void *);
374static int	bridge_ioctl_del(struct bridge_softc *, void *);
375static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
376static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
377static int	bridge_ioctl_scache(struct bridge_softc *, void *);
378static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
379static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
380static int	bridge_ioctl_rts(struct bridge_softc *, void *);
381static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
382static int	bridge_ioctl_sto(struct bridge_softc *, void *);
383static int	bridge_ioctl_gto(struct bridge_softc *, void *);
384static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
385static int	bridge_ioctl_flush(struct bridge_softc *, void *);
386static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
387static int	bridge_ioctl_spri(struct bridge_softc *, void *);
388static int	bridge_ioctl_ght(struct bridge_softc *, void *);
389static int	bridge_ioctl_sht(struct bridge_softc *, void *);
390static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
391static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
392static int	bridge_ioctl_gma(struct bridge_softc *, void *);
393static int	bridge_ioctl_sma(struct bridge_softc *, void *);
394static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
395static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
396static int	bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
397static int	bridge_ioctl_addspan(struct bridge_softc *, void *);
398static int	bridge_ioctl_delspan(struct bridge_softc *, void *);
399static int	bridge_ioctl_gbparam(struct bridge_softc *, void *);
400static int	bridge_ioctl_grte(struct bridge_softc *, void *);
401static int	bridge_ioctl_gifsstp(struct bridge_softc *, void *);
402static int	bridge_ioctl_sproto(struct bridge_softc *, void *);
403static int	bridge_ioctl_stxhc(struct bridge_softc *, void *);
404static int	bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
405		    int);
406#ifdef INET
407static int	bridge_ip_checkbasic(struct mbuf **mp);
408static int	bridge_fragment(struct ifnet *, struct mbuf **mp,
409		    struct ether_header *, int, struct llc *);
410#endif /* INET */
411#ifdef INET6
412static int	bridge_ip6_checkbasic(struct mbuf **mp);
413#endif /* INET6 */
414static void	bridge_linkstate(struct ifnet *ifp);
415static void	bridge_linkcheck(struct bridge_softc *sc);
416
417/*
418 * Use the "null" value from IEEE 802.1Q-2014 Table 9-2
419 * to indicate untagged frames.
420 */
421#define	VLANTAGOF(_m)	\
422    (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : DOT1Q_VID_NULL
423
424static struct bstp_cb_ops bridge_ops = {
425	.bcb_state = bridge_state_change,
426	.bcb_rtage = bridge_rtable_expire
427};
428
429SYSCTL_DECL(_net_link);
430static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
431    "Bridge");
432
433/* only pass IP[46] packets when pfil is enabled */
434VNET_DEFINE_STATIC(int, pfil_onlyip) = 1;
435#define	V_pfil_onlyip	VNET(pfil_onlyip)
436SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
437    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
438    "Only pass IP packets when pfil is enabled");
439
440/* run pfil hooks on the bridge interface */
441VNET_DEFINE_STATIC(int, pfil_bridge) = 0;
442#define	V_pfil_bridge	VNET(pfil_bridge)
443SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
444    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
445    "Packet filter on the bridge interface");
446
447/* layer2 filter with ipfw */
448VNET_DEFINE_STATIC(int, pfil_ipfw);
449#define	V_pfil_ipfw	VNET(pfil_ipfw)
450
451/* layer2 ARP filter with ipfw */
452VNET_DEFINE_STATIC(int, pfil_ipfw_arp);
453#define	V_pfil_ipfw_arp	VNET(pfil_ipfw_arp)
454SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
455    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
456    "Filter ARP packets through IPFW layer2");
457
458/* run pfil hooks on the member interface */
459VNET_DEFINE_STATIC(int, pfil_member) = 0;
460#define	V_pfil_member	VNET(pfil_member)
461SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
462    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
463    "Packet filter on the member interface");
464
465/* run pfil hooks on the physical interface for locally destined packets */
466VNET_DEFINE_STATIC(int, pfil_local_phys);
467#define	V_pfil_local_phys	VNET(pfil_local_phys)
468SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
469    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
470    "Packet filter on the physical interface for locally destined packets");
471
472/* log STP state changes */
473VNET_DEFINE_STATIC(int, log_stp);
474#define	V_log_stp	VNET(log_stp)
475SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
476    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
477    "Log STP state changes");
478
479/* share MAC with first bridge member */
480VNET_DEFINE_STATIC(int, bridge_inherit_mac);
481#define	V_bridge_inherit_mac	VNET(bridge_inherit_mac)
482SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
483    CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
484    "Inherit MAC address from the first bridge member");
485
486VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0;
487#define	V_allow_llz_overlap	VNET(allow_llz_overlap)
488SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
489    CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
490    "Allow overlap of link-local scope "
491    "zones of a bridge interface and the member interfaces");
492
493/* log MAC address port flapping */
494VNET_DEFINE_STATIC(bool, log_mac_flap) = true;
495#define	V_log_mac_flap	VNET(log_mac_flap)
496SYSCTL_BOOL(_net_link_bridge, OID_AUTO, log_mac_flap,
497    CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(log_mac_flap), true,
498    "Log MAC address port flapping");
499
500VNET_DEFINE_STATIC(int, log_interval) = 5;
501VNET_DEFINE_STATIC(int, log_count) = 0;
502VNET_DEFINE_STATIC(struct timeval, log_last) = { 0 };
503
504#define	V_log_interval	VNET(log_interval)
505#define	V_log_count	VNET(log_count)
506#define	V_log_last	VNET(log_last)
507
508struct bridge_control {
509	int	(*bc_func)(struct bridge_softc *, void *);
510	int	bc_argsize;
511	int	bc_flags;
512};
513
514#define	BC_F_COPYIN		0x01	/* copy arguments in */
515#define	BC_F_COPYOUT		0x02	/* copy arguments out */
516#define	BC_F_SUSER		0x04	/* do super-user check */
517
518static const struct bridge_control bridge_control_table[] = {
519	{ bridge_ioctl_add,		sizeof(struct ifbreq),
520	  BC_F_COPYIN|BC_F_SUSER },
521	{ bridge_ioctl_del,		sizeof(struct ifbreq),
522	  BC_F_COPYIN|BC_F_SUSER },
523
524	{ bridge_ioctl_gifflags,	sizeof(struct ifbreq),
525	  BC_F_COPYIN|BC_F_COPYOUT },
526	{ bridge_ioctl_sifflags,	sizeof(struct ifbreq),
527	  BC_F_COPYIN|BC_F_SUSER },
528
529	{ bridge_ioctl_scache,		sizeof(struct ifbrparam),
530	  BC_F_COPYIN|BC_F_SUSER },
531	{ bridge_ioctl_gcache,		sizeof(struct ifbrparam),
532	  BC_F_COPYOUT },
533
534	{ bridge_ioctl_gifs,		sizeof(struct ifbifconf),
535	  BC_F_COPYIN|BC_F_COPYOUT },
536	{ bridge_ioctl_rts,		sizeof(struct ifbaconf),
537	  BC_F_COPYIN|BC_F_COPYOUT },
538
539	{ bridge_ioctl_saddr,		sizeof(struct ifbareq),
540	  BC_F_COPYIN|BC_F_SUSER },
541
542	{ bridge_ioctl_sto,		sizeof(struct ifbrparam),
543	  BC_F_COPYIN|BC_F_SUSER },
544	{ bridge_ioctl_gto,		sizeof(struct ifbrparam),
545	  BC_F_COPYOUT },
546
547	{ bridge_ioctl_daddr,		sizeof(struct ifbareq),
548	  BC_F_COPYIN|BC_F_SUSER },
549
550	{ bridge_ioctl_flush,		sizeof(struct ifbreq),
551	  BC_F_COPYIN|BC_F_SUSER },
552
553	{ bridge_ioctl_gpri,		sizeof(struct ifbrparam),
554	  BC_F_COPYOUT },
555	{ bridge_ioctl_spri,		sizeof(struct ifbrparam),
556	  BC_F_COPYIN|BC_F_SUSER },
557
558	{ bridge_ioctl_ght,		sizeof(struct ifbrparam),
559	  BC_F_COPYOUT },
560	{ bridge_ioctl_sht,		sizeof(struct ifbrparam),
561	  BC_F_COPYIN|BC_F_SUSER },
562
563	{ bridge_ioctl_gfd,		sizeof(struct ifbrparam),
564	  BC_F_COPYOUT },
565	{ bridge_ioctl_sfd,		sizeof(struct ifbrparam),
566	  BC_F_COPYIN|BC_F_SUSER },
567
568	{ bridge_ioctl_gma,		sizeof(struct ifbrparam),
569	  BC_F_COPYOUT },
570	{ bridge_ioctl_sma,		sizeof(struct ifbrparam),
571	  BC_F_COPYIN|BC_F_SUSER },
572
573	{ bridge_ioctl_sifprio,		sizeof(struct ifbreq),
574	  BC_F_COPYIN|BC_F_SUSER },
575
576	{ bridge_ioctl_sifcost,		sizeof(struct ifbreq),
577	  BC_F_COPYIN|BC_F_SUSER },
578
579	{ bridge_ioctl_addspan,		sizeof(struct ifbreq),
580	  BC_F_COPYIN|BC_F_SUSER },
581	{ bridge_ioctl_delspan,		sizeof(struct ifbreq),
582	  BC_F_COPYIN|BC_F_SUSER },
583
584	{ bridge_ioctl_gbparam,		sizeof(struct ifbropreq),
585	  BC_F_COPYOUT },
586
587	{ bridge_ioctl_grte,		sizeof(struct ifbrparam),
588	  BC_F_COPYOUT },
589
590	{ bridge_ioctl_gifsstp,		sizeof(struct ifbpstpconf),
591	  BC_F_COPYIN|BC_F_COPYOUT },
592
593	{ bridge_ioctl_sproto,		sizeof(struct ifbrparam),
594	  BC_F_COPYIN|BC_F_SUSER },
595
596	{ bridge_ioctl_stxhc,		sizeof(struct ifbrparam),
597	  BC_F_COPYIN|BC_F_SUSER },
598
599	{ bridge_ioctl_sifmaxaddr,	sizeof(struct ifbreq),
600	  BC_F_COPYIN|BC_F_SUSER },
601
602};
603static const int bridge_control_table_size = nitems(bridge_control_table);
604
605VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list);
606#define	V_bridge_list	VNET(bridge_list)
607#define	BRIDGE_LIST_LOCK_INIT(x)	sx_init(&V_bridge_list_sx,	\
608					    "if_bridge list")
609#define	BRIDGE_LIST_LOCK_DESTROY(x)	sx_destroy(&V_bridge_list_sx)
610#define	BRIDGE_LIST_LOCK(x)		sx_xlock(&V_bridge_list_sx)
611#define	BRIDGE_LIST_UNLOCK(x)		sx_xunlock(&V_bridge_list_sx)
612
613VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner);
614#define	V_bridge_cloner	VNET(bridge_cloner)
615
616static const char bridge_name[] = "bridge";
617
618static void
619vnet_bridge_init(const void *unused __unused)
620{
621
622	V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
623	    sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
624	    UMA_ALIGN_PTR, 0);
625	BRIDGE_LIST_LOCK_INIT();
626	LIST_INIT(&V_bridge_list);
627
628	struct if_clone_addreq req = {
629		.create_f = bridge_clone_create,
630		.destroy_f = bridge_clone_destroy,
631		.flags = IFC_F_AUTOUNIT,
632	};
633	V_bridge_cloner = ifc_attach_cloner(bridge_name, &req);
634}
635VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
636    vnet_bridge_init, NULL);
637
638static void
639vnet_bridge_uninit(const void *unused __unused)
640{
641
642	ifc_detach_cloner(V_bridge_cloner);
643	V_bridge_cloner = NULL;
644	BRIDGE_LIST_LOCK_DESTROY();
645
646	/* Callbacks may use the UMA zone. */
647	NET_EPOCH_DRAIN_CALLBACKS();
648
649	uma_zdestroy(V_bridge_rtnode_zone);
650}
651VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
652    vnet_bridge_uninit, NULL);
653
654static int
655bridge_modevent(module_t mod, int type, void *data)
656{
657
658	switch (type) {
659	case MOD_LOAD:
660		bridge_dn_p = bridge_dummynet;
661		bridge_detach_cookie = EVENTHANDLER_REGISTER(
662		    ifnet_departure_event, bridge_ifdetach, NULL,
663		    EVENTHANDLER_PRI_ANY);
664		break;
665	case MOD_UNLOAD:
666		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
667		    bridge_detach_cookie);
668		bridge_dn_p = NULL;
669		break;
670	default:
671		return (EOPNOTSUPP);
672	}
673	return (0);
674}
675
676static moduledata_t bridge_mod = {
677	"if_bridge",
678	bridge_modevent,
679	0
680};
681
682DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
683MODULE_VERSION(if_bridge, 1);
684MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
685
686/*
687 * handler for net.link.bridge.ipfw
688 */
689static int
690sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
691{
692	int enable = V_pfil_ipfw;
693	int error;
694
695	error = sysctl_handle_int(oidp, &enable, 0, req);
696	enable &= 1;
697
698	if (enable != V_pfil_ipfw) {
699		V_pfil_ipfw = enable;
700
701		/*
702		 * Disable pfil so that ipfw doesnt run twice, if the user
703		 * really wants both then they can re-enable pfil_bridge and/or
704		 * pfil_member. Also allow non-ip packets as ipfw can filter by
705		 * layer2 type.
706		 */
707		if (V_pfil_ipfw) {
708			V_pfil_onlyip = 0;
709			V_pfil_bridge = 0;
710			V_pfil_member = 0;
711		}
712	}
713
714	return (error);
715}
716SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw,
717    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET | CTLFLAG_NEEDGIANT,
718    &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I",
719    "Layer2 filter with IPFW");
720
721#ifdef VIMAGE
722static void
723bridge_reassign(struct ifnet *ifp, struct vnet *newvnet, char *arg)
724{
725	struct bridge_softc *sc = ifp->if_softc;
726	struct bridge_iflist *bif;
727
728	BRIDGE_LOCK(sc);
729
730	while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL)
731		bridge_delete_member(sc, bif, 0);
732
733	while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) {
734		bridge_delete_span(sc, bif);
735	}
736
737	BRIDGE_UNLOCK(sc);
738
739	ether_reassign(ifp, newvnet, arg);
740}
741#endif
742
743/*
744 * bridge_clone_create:
745 *
746 *	Create a new bridge instance.
747 */
748static int
749bridge_clone_create(struct if_clone *ifc, char *name, size_t len,
750    struct ifc_data *ifd, struct ifnet **ifpp)
751{
752	struct bridge_softc *sc;
753	struct ifnet *ifp;
754
755	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
756	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
757	if (ifp == NULL) {
758		free(sc, M_DEVBUF);
759		return (ENOSPC);
760	}
761
762	BRIDGE_LOCK_INIT(sc);
763	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
764	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
765
766	/* Initialize our routing table. */
767	bridge_rtable_init(sc);
768
769	callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0);
770
771	CK_LIST_INIT(&sc->sc_iflist);
772	CK_LIST_INIT(&sc->sc_spanlist);
773
774	ifp->if_softc = sc;
775	if_initname(ifp, bridge_name, ifd->unit);
776	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
777	ifp->if_ioctl = bridge_ioctl;
778#ifdef ALTQ
779	ifp->if_start = bridge_altq_start;
780	ifp->if_transmit = bridge_altq_transmit;
781	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
782	ifp->if_snd.ifq_drv_maxlen = 0;
783	IFQ_SET_READY(&ifp->if_snd);
784#else
785	ifp->if_transmit = bridge_transmit;
786#endif
787	ifp->if_qflush = bridge_qflush;
788	ifp->if_init = bridge_init;
789	ifp->if_type = IFT_BRIDGE;
790
791	ether_gen_addr(ifp, &sc->sc_defaddr);
792
793	bstp_attach(&sc->sc_stp, &bridge_ops);
794	ether_ifattach(ifp, sc->sc_defaddr.octet);
795	/* Now undo some of the damage... */
796	ifp->if_baudrate = 0;
797	ifp->if_type = IFT_BRIDGE;
798#ifdef VIMAGE
799	ifp->if_reassign = bridge_reassign;
800#endif
801	sc->sc_if_input = ifp->if_input;	/* ether_input */
802	ifp->if_input = bridge_inject;
803
804	/*
805	 * Allow BRIDGE_INPUT() to pass in packets originating from the bridge
806	 * itself via bridge_inject().  This is required for netmap but
807	 * otherwise has no effect.
808	 */
809	ifp->if_bridge_input = bridge_input;
810
811	BRIDGE_LIST_LOCK();
812	LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
813	BRIDGE_LIST_UNLOCK();
814	*ifpp = ifp;
815
816	return (0);
817}
818
819static void
820bridge_clone_destroy_cb(struct epoch_context *ctx)
821{
822	struct bridge_softc *sc;
823
824	sc = __containerof(ctx, struct bridge_softc, sc_epoch_ctx);
825
826	BRIDGE_LOCK_DESTROY(sc);
827	free(sc, M_DEVBUF);
828}
829
830/*
831 * bridge_clone_destroy:
832 *
833 *	Destroy a bridge instance.
834 */
835static int
836bridge_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
837{
838	struct bridge_softc *sc = ifp->if_softc;
839	struct bridge_iflist *bif;
840	struct epoch_tracker et;
841
842	BRIDGE_LOCK(sc);
843
844	bridge_stop(ifp, 1);
845	ifp->if_flags &= ~IFF_UP;
846
847	while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL)
848		bridge_delete_member(sc, bif, 0);
849
850	while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) {
851		bridge_delete_span(sc, bif);
852	}
853
854	/* Tear down the routing table. */
855	bridge_rtable_fini(sc);
856
857	BRIDGE_UNLOCK(sc);
858
859	NET_EPOCH_ENTER(et);
860
861	callout_drain(&sc->sc_brcallout);
862
863	BRIDGE_LIST_LOCK();
864	LIST_REMOVE(sc, sc_list);
865	BRIDGE_LIST_UNLOCK();
866
867	bstp_detach(&sc->sc_stp);
868#ifdef ALTQ
869	IFQ_PURGE(&ifp->if_snd);
870#endif
871	NET_EPOCH_EXIT(et);
872
873	ether_ifdetach(ifp);
874	if_free(ifp);
875
876	NET_EPOCH_CALL(bridge_clone_destroy_cb, &sc->sc_epoch_ctx);
877
878	return (0);
879}
880
881/*
882 * bridge_ioctl:
883 *
884 *	Handle a control request from the operator.
885 */
886static int
887bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
888{
889	struct bridge_softc *sc = ifp->if_softc;
890	struct ifreq *ifr = (struct ifreq *)data;
891	struct bridge_iflist *bif;
892	struct thread *td = curthread;
893	union {
894		struct ifbreq ifbreq;
895		struct ifbifconf ifbifconf;
896		struct ifbareq ifbareq;
897		struct ifbaconf ifbaconf;
898		struct ifbrparam ifbrparam;
899		struct ifbropreq ifbropreq;
900	} args;
901	struct ifdrv *ifd = (struct ifdrv *) data;
902	const struct bridge_control *bc;
903	int error = 0, oldmtu;
904
905	BRIDGE_LOCK(sc);
906
907	switch (cmd) {
908	case SIOCADDMULTI:
909	case SIOCDELMULTI:
910		break;
911
912	case SIOCGDRVSPEC:
913	case SIOCSDRVSPEC:
914		if (ifd->ifd_cmd >= bridge_control_table_size) {
915			error = EINVAL;
916			break;
917		}
918		bc = &bridge_control_table[ifd->ifd_cmd];
919
920		if (cmd == SIOCGDRVSPEC &&
921		    (bc->bc_flags & BC_F_COPYOUT) == 0) {
922			error = EINVAL;
923			break;
924		}
925		else if (cmd == SIOCSDRVSPEC &&
926		    (bc->bc_flags & BC_F_COPYOUT) != 0) {
927			error = EINVAL;
928			break;
929		}
930
931		if (bc->bc_flags & BC_F_SUSER) {
932			error = priv_check(td, PRIV_NET_BRIDGE);
933			if (error)
934				break;
935		}
936
937		if (ifd->ifd_len != bc->bc_argsize ||
938		    ifd->ifd_len > sizeof(args)) {
939			error = EINVAL;
940			break;
941		}
942
943		bzero(&args, sizeof(args));
944		if (bc->bc_flags & BC_F_COPYIN) {
945			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
946			if (error)
947				break;
948		}
949
950		oldmtu = ifp->if_mtu;
951		error = (*bc->bc_func)(sc, &args);
952		if (error)
953			break;
954
955		/*
956		 * Bridge MTU may change during addition of the first port.
957		 * If it did, do network layer specific procedure.
958		 */
959		if (ifp->if_mtu != oldmtu)
960			if_notifymtu(ifp);
961
962		if (bc->bc_flags & BC_F_COPYOUT)
963			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
964
965		break;
966
967	case SIOCSIFFLAGS:
968		if (!(ifp->if_flags & IFF_UP) &&
969		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
970			/*
971			 * If interface is marked down and it is running,
972			 * then stop and disable it.
973			 */
974			bridge_stop(ifp, 1);
975		} else if ((ifp->if_flags & IFF_UP) &&
976		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
977			/*
978			 * If interface is marked up and it is stopped, then
979			 * start it.
980			 */
981			BRIDGE_UNLOCK(sc);
982			(*ifp->if_init)(sc);
983			BRIDGE_LOCK(sc);
984		}
985		break;
986
987	case SIOCSIFMTU:
988		oldmtu = sc->sc_ifp->if_mtu;
989
990		if (ifr->ifr_mtu < IF_MINMTU) {
991			error = EINVAL;
992			break;
993		}
994		if (CK_LIST_EMPTY(&sc->sc_iflist)) {
995			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
996			break;
997		}
998		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
999			error = (*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
1000			    SIOCSIFMTU, (caddr_t)ifr);
1001			if (error != 0) {
1002				log(LOG_NOTICE, "%s: invalid MTU: %u for"
1003				    " member %s\n", sc->sc_ifp->if_xname,
1004				    ifr->ifr_mtu,
1005				    bif->bif_ifp->if_xname);
1006				error = EINVAL;
1007				break;
1008			}
1009		}
1010		if (error) {
1011			/* Restore the previous MTU on all member interfaces. */
1012			ifr->ifr_mtu = oldmtu;
1013			CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1014				(*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
1015				    SIOCSIFMTU, (caddr_t)ifr);
1016			}
1017		} else {
1018			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1019		}
1020		break;
1021	default:
1022		/*
1023		 * drop the lock as ether_ioctl() will call bridge_start() and
1024		 * cause the lock to be recursed.
1025		 */
1026		BRIDGE_UNLOCK(sc);
1027		error = ether_ioctl(ifp, cmd, data);
1028		BRIDGE_LOCK(sc);
1029		break;
1030	}
1031
1032	BRIDGE_UNLOCK(sc);
1033
1034	return (error);
1035}
1036
1037/*
1038 * bridge_mutecaps:
1039 *
1040 *	Clear or restore unwanted capabilities on the member interface
1041 */
1042static void
1043bridge_mutecaps(struct bridge_softc *sc)
1044{
1045	struct bridge_iflist *bif;
1046	int enabled, mask;
1047
1048	BRIDGE_LOCK_ASSERT(sc);
1049
1050	/* Initial bitmask of capabilities to test */
1051	mask = BRIDGE_IFCAPS_MASK;
1052
1053	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1054		/* Every member must support it or its disabled */
1055		mask &= bif->bif_savedcaps;
1056	}
1057
1058	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1059		enabled = bif->bif_ifp->if_capenable;
1060		enabled &= ~BRIDGE_IFCAPS_STRIP;
1061		/* strip off mask bits and enable them again if allowed */
1062		enabled &= ~BRIDGE_IFCAPS_MASK;
1063		enabled |= mask;
1064		bridge_set_ifcap(sc, bif, enabled);
1065	}
1066}
1067
1068static void
1069bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1070{
1071	struct ifnet *ifp = bif->bif_ifp;
1072	struct ifreq ifr;
1073	int error, mask, stuck;
1074
1075	bzero(&ifr, sizeof(ifr));
1076	ifr.ifr_reqcap = set;
1077
1078	if (ifp->if_capenable != set) {
1079		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
1080		if (error)
1081			if_printf(sc->sc_ifp,
1082			    "error setting capabilities on %s: %d\n",
1083			    ifp->if_xname, error);
1084		mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP;
1085		stuck = ifp->if_capenable & mask & ~set;
1086		if (stuck != 0)
1087			if_printf(sc->sc_ifp,
1088			    "can't disable some capabilities on %s: 0x%x\n",
1089			    ifp->if_xname, stuck);
1090	}
1091}
1092
1093/*
1094 * bridge_lookup_member:
1095 *
1096 *	Lookup a bridge member interface.
1097 */
1098static struct bridge_iflist *
1099bridge_lookup_member(struct bridge_softc *sc, const char *name)
1100{
1101	struct bridge_iflist *bif;
1102	struct ifnet *ifp;
1103
1104	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
1105
1106	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1107		ifp = bif->bif_ifp;
1108		if (strcmp(ifp->if_xname, name) == 0)
1109			return (bif);
1110	}
1111
1112	return (NULL);
1113}
1114
1115/*
1116 * bridge_lookup_member_if:
1117 *
1118 *	Lookup a bridge member interface by ifnet*.
1119 */
1120static struct bridge_iflist *
1121bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
1122{
1123	struct bridge_iflist *bif;
1124
1125	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
1126
1127	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1128		if (bif->bif_ifp == member_ifp)
1129			return (bif);
1130	}
1131
1132	return (NULL);
1133}
1134
1135static void
1136bridge_delete_member_cb(struct epoch_context *ctx)
1137{
1138	struct bridge_iflist *bif;
1139
1140	bif = __containerof(ctx, struct bridge_iflist, bif_epoch_ctx);
1141
1142	free(bif, M_DEVBUF);
1143}
1144
1145/*
1146 * bridge_delete_member:
1147 *
1148 *	Delete the specified member interface.
1149 */
1150static void
1151bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
1152    int gone)
1153{
1154	struct ifnet *ifs = bif->bif_ifp;
1155	struct ifnet *fif = NULL;
1156	struct bridge_iflist *bifl;
1157
1158	BRIDGE_LOCK_ASSERT(sc);
1159
1160	if (bif->bif_flags & IFBIF_STP)
1161		bstp_disable(&bif->bif_stp);
1162
1163	ifs->if_bridge = NULL;
1164	CK_LIST_REMOVE(bif, bif_next);
1165
1166	/*
1167	 * If removing the interface that gave the bridge its mac address, set
1168	 * the mac address of the bridge to the address of the next member, or
1169	 * to its default address if no members are left.
1170	 */
1171	if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
1172		if (CK_LIST_EMPTY(&sc->sc_iflist)) {
1173			bcopy(&sc->sc_defaddr,
1174			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
1175			sc->sc_ifaddr = NULL;
1176		} else {
1177			bifl = CK_LIST_FIRST(&sc->sc_iflist);
1178			fif = bifl->bif_ifp;
1179			bcopy(IF_LLADDR(fif),
1180			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
1181			sc->sc_ifaddr = fif;
1182		}
1183		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
1184	}
1185
1186	bridge_linkcheck(sc);
1187	bridge_mutecaps(sc);	/* recalcuate now this interface is removed */
1188	BRIDGE_RT_LOCK(sc);
1189	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
1190	BRIDGE_RT_UNLOCK(sc);
1191	KASSERT(bif->bif_addrcnt == 0,
1192	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
1193
1194	ifs->if_bridge_output = NULL;
1195	ifs->if_bridge_input = NULL;
1196	ifs->if_bridge_linkstate = NULL;
1197	if (!gone) {
1198		switch (ifs->if_type) {
1199		case IFT_ETHER:
1200		case IFT_L2VLAN:
1201			/*
1202			 * Take the interface out of promiscuous mode, but only
1203			 * if it was promiscuous in the first place. It might
1204			 * not be if we're in the bridge_ioctl_add() error path.
1205			 */
1206			if (ifs->if_flags & IFF_PROMISC)
1207				(void) ifpromisc(ifs, 0);
1208			break;
1209
1210		case IFT_GIF:
1211			break;
1212
1213		default:
1214#ifdef DIAGNOSTIC
1215			panic("bridge_delete_member: impossible");
1216#endif
1217			break;
1218		}
1219		/* reneable any interface capabilities */
1220		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
1221	}
1222	bstp_destroy(&bif->bif_stp);	/* prepare to free */
1223
1224	NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx);
1225}
1226
1227/*
1228 * bridge_delete_span:
1229 *
1230 *	Delete the specified span interface.
1231 */
1232static void
1233bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1234{
1235	BRIDGE_LOCK_ASSERT(sc);
1236
1237	KASSERT(bif->bif_ifp->if_bridge == NULL,
1238	    ("%s: not a span interface", __func__));
1239
1240	CK_LIST_REMOVE(bif, bif_next);
1241
1242	NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx);
1243}
1244
1245static int
1246bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1247{
1248	struct ifbreq *req = arg;
1249	struct bridge_iflist *bif = NULL;
1250	struct ifnet *ifs;
1251	int error = 0;
1252
1253	ifs = ifunit(req->ifbr_ifsname);
1254	if (ifs == NULL)
1255		return (ENOENT);
1256	if (ifs->if_ioctl == NULL)	/* must be supported */
1257		return (EINVAL);
1258
1259	/* If it's in the span list, it can't be a member. */
1260	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1261		if (ifs == bif->bif_ifp)
1262			return (EBUSY);
1263
1264	if (ifs->if_bridge == sc)
1265		return (EEXIST);
1266
1267	if (ifs->if_bridge != NULL)
1268		return (EBUSY);
1269
1270	switch (ifs->if_type) {
1271	case IFT_ETHER:
1272	case IFT_L2VLAN:
1273	case IFT_GIF:
1274		/* permitted interface types */
1275		break;
1276	default:
1277		return (EINVAL);
1278	}
1279
1280#ifdef INET6
1281	/*
1282	 * Two valid inet6 addresses with link-local scope must not be
1283	 * on the parent interface and the member interfaces at the
1284	 * same time.  This restriction is needed to prevent violation
1285	 * of link-local scope zone.  Attempts to add a member
1286	 * interface which has inet6 addresses when the parent has
1287	 * inet6 triggers removal of all inet6 addresses on the member
1288	 * interface.
1289	 */
1290
1291	/* Check if the parent interface has a link-local scope addr. */
1292	if (V_allow_llz_overlap == 0 &&
1293	    in6ifa_llaonifp(sc->sc_ifp) != NULL) {
1294		/*
1295		 * If any, remove all inet6 addresses from the member
1296		 * interfaces.
1297		 */
1298		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1299 			if (in6ifa_llaonifp(bif->bif_ifp)) {
1300				in6_ifdetach(bif->bif_ifp);
1301				if_printf(sc->sc_ifp,
1302				    "IPv6 addresses on %s have been removed "
1303				    "before adding it as a member to prevent "
1304				    "IPv6 address scope violation.\n",
1305				    bif->bif_ifp->if_xname);
1306			}
1307		}
1308		if (in6ifa_llaonifp(ifs)) {
1309			in6_ifdetach(ifs);
1310			if_printf(sc->sc_ifp,
1311			    "IPv6 addresses on %s have been removed "
1312			    "before adding it as a member to prevent "
1313			    "IPv6 address scope violation.\n",
1314			    ifs->if_xname);
1315		}
1316	}
1317#endif
1318	/* Allow the first Ethernet member to define the MTU */
1319	if (CK_LIST_EMPTY(&sc->sc_iflist))
1320		sc->sc_ifp->if_mtu = ifs->if_mtu;
1321	else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
1322		struct ifreq ifr;
1323
1324		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s",
1325		    ifs->if_xname);
1326		ifr.ifr_mtu = sc->sc_ifp->if_mtu;
1327
1328		error = (*ifs->if_ioctl)(ifs,
1329		    SIOCSIFMTU, (caddr_t)&ifr);
1330		if (error != 0) {
1331			log(LOG_NOTICE, "%s: invalid MTU: %u for"
1332			    " new member %s\n", sc->sc_ifp->if_xname,
1333			    ifr.ifr_mtu,
1334			    ifs->if_xname);
1335			return (EINVAL);
1336		}
1337	}
1338
1339	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
1340	if (bif == NULL)
1341		return (ENOMEM);
1342
1343	bif->bif_ifp = ifs;
1344	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
1345	bif->bif_savedcaps = ifs->if_capenable;
1346
1347	/*
1348	 * Assign the interface's MAC address to the bridge if it's the first
1349	 * member and the MAC address of the bridge has not been changed from
1350	 * the default randomly generated one.
1351	 */
1352	if (V_bridge_inherit_mac && CK_LIST_EMPTY(&sc->sc_iflist) &&
1353	    !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) {
1354		bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
1355		sc->sc_ifaddr = ifs;
1356		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
1357	}
1358
1359	ifs->if_bridge = sc;
1360	ifs->if_bridge_output = bridge_output;
1361	ifs->if_bridge_input = bridge_input;
1362	ifs->if_bridge_linkstate = bridge_linkstate;
1363	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
1364	/*
1365	 * XXX: XLOCK HERE!?!
1366	 *
1367	 * NOTE: insert_***HEAD*** should be safe for the traversals.
1368	 */
1369	CK_LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
1370
1371	/* Set interface capabilities to the intersection set of all members */
1372	bridge_mutecaps(sc);
1373	bridge_linkcheck(sc);
1374
1375	/* Place the interface into promiscuous mode */
1376	switch (ifs->if_type) {
1377		case IFT_ETHER:
1378		case IFT_L2VLAN:
1379			error = ifpromisc(ifs, 1);
1380			break;
1381	}
1382
1383	if (error)
1384		bridge_delete_member(sc, bif, 0);
1385	return (error);
1386}
1387
1388static int
1389bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1390{
1391	struct ifbreq *req = arg;
1392	struct bridge_iflist *bif;
1393
1394	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1395	if (bif == NULL)
1396		return (ENOENT);
1397
1398	bridge_delete_member(sc, bif, 0);
1399
1400	return (0);
1401}
1402
1403static int
1404bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1405{
1406	struct ifbreq *req = arg;
1407	struct bridge_iflist *bif;
1408	struct bstp_port *bp;
1409
1410	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1411	if (bif == NULL)
1412		return (ENOENT);
1413
1414	bp = &bif->bif_stp;
1415	req->ifbr_ifsflags = bif->bif_flags;
1416	req->ifbr_state = bp->bp_state;
1417	req->ifbr_priority = bp->bp_priority;
1418	req->ifbr_path_cost = bp->bp_path_cost;
1419	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
1420	req->ifbr_proto = bp->bp_protover;
1421	req->ifbr_role = bp->bp_role;
1422	req->ifbr_stpflags = bp->bp_flags;
1423	req->ifbr_addrcnt = bif->bif_addrcnt;
1424	req->ifbr_addrmax = bif->bif_addrmax;
1425	req->ifbr_addrexceeded = bif->bif_addrexceeded;
1426
1427	/* Copy STP state options as flags */
1428	if (bp->bp_operedge)
1429		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
1430	if (bp->bp_flags & BSTP_PORT_AUTOEDGE)
1431		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
1432	if (bp->bp_ptp_link)
1433		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
1434	if (bp->bp_flags & BSTP_PORT_AUTOPTP)
1435		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
1436	if (bp->bp_flags & BSTP_PORT_ADMEDGE)
1437		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
1438	if (bp->bp_flags & BSTP_PORT_ADMCOST)
1439		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
1440	return (0);
1441}
1442
1443static int
1444bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1445{
1446	struct epoch_tracker et;
1447	struct ifbreq *req = arg;
1448	struct bridge_iflist *bif;
1449	struct bstp_port *bp;
1450	int error;
1451
1452	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1453	if (bif == NULL)
1454		return (ENOENT);
1455	bp = &bif->bif_stp;
1456
1457	if (req->ifbr_ifsflags & IFBIF_SPAN)
1458		/* SPAN is readonly */
1459		return (EINVAL);
1460
1461	NET_EPOCH_ENTER(et);
1462
1463	if (req->ifbr_ifsflags & IFBIF_STP) {
1464		if ((bif->bif_flags & IFBIF_STP) == 0) {
1465			error = bstp_enable(&bif->bif_stp);
1466			if (error) {
1467				NET_EPOCH_EXIT(et);
1468				return (error);
1469			}
1470		}
1471	} else {
1472		if ((bif->bif_flags & IFBIF_STP) != 0)
1473			bstp_disable(&bif->bif_stp);
1474	}
1475
1476	/* Pass on STP flags */
1477	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
1478	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
1479	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
1480	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
1481
1482	/* Save the bits relating to the bridge */
1483	bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
1484
1485	NET_EPOCH_EXIT(et);
1486
1487	return (0);
1488}
1489
1490static int
1491bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1492{
1493	struct ifbrparam *param = arg;
1494
1495	sc->sc_brtmax = param->ifbrp_csize;
1496	bridge_rttrim(sc);
1497
1498	return (0);
1499}
1500
1501static int
1502bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1503{
1504	struct ifbrparam *param = arg;
1505
1506	param->ifbrp_csize = sc->sc_brtmax;
1507
1508	return (0);
1509}
1510
1511static int
1512bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1513{
1514	struct ifbifconf *bifc = arg;
1515	struct bridge_iflist *bif;
1516	struct ifbreq breq;
1517	char *buf, *outbuf;
1518	int count, buflen, len, error = 0;
1519
1520	count = 0;
1521	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
1522		count++;
1523	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1524		count++;
1525
1526	buflen = sizeof(breq) * count;
1527	if (bifc->ifbic_len == 0) {
1528		bifc->ifbic_len = buflen;
1529		return (0);
1530	}
1531	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
1532	if (outbuf == NULL)
1533		return (ENOMEM);
1534
1535	count = 0;
1536	buf = outbuf;
1537	len = min(bifc->ifbic_len, buflen);
1538	bzero(&breq, sizeof(breq));
1539	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1540		if (len < sizeof(breq))
1541			break;
1542
1543		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
1544		    sizeof(breq.ifbr_ifsname));
1545		/* Fill in the ifbreq structure */
1546		error = bridge_ioctl_gifflags(sc, &breq);
1547		if (error)
1548			break;
1549		memcpy(buf, &breq, sizeof(breq));
1550		count++;
1551		buf += sizeof(breq);
1552		len -= sizeof(breq);
1553	}
1554	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1555		if (len < sizeof(breq))
1556			break;
1557
1558		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
1559		    sizeof(breq.ifbr_ifsname));
1560		breq.ifbr_ifsflags = bif->bif_flags;
1561		breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff;
1562		memcpy(buf, &breq, sizeof(breq));
1563		count++;
1564		buf += sizeof(breq);
1565		len -= sizeof(breq);
1566	}
1567
1568	bifc->ifbic_len = sizeof(breq) * count;
1569	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);
1570	free(outbuf, M_TEMP);
1571	return (error);
1572}
1573
1574static int
1575bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1576{
1577	struct ifbaconf *bac = arg;
1578	struct bridge_rtnode *brt;
1579	struct ifbareq bareq;
1580	char *buf, *outbuf;
1581	int count, buflen, len, error = 0;
1582
1583	if (bac->ifbac_len == 0)
1584		return (0);
1585
1586	count = 0;
1587	CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)
1588		count++;
1589	buflen = sizeof(bareq) * count;
1590
1591	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
1592	if (outbuf == NULL)
1593		return (ENOMEM);
1594
1595	count = 0;
1596	buf = outbuf;
1597	len = min(bac->ifbac_len, buflen);
1598	bzero(&bareq, sizeof(bareq));
1599	CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1600		if (len < sizeof(bareq))
1601			goto out;
1602		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1603		    sizeof(bareq.ifba_ifsname));
1604		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1605		bareq.ifba_vlan = brt->brt_vlan;
1606		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1607				time_uptime < brt->brt_expire)
1608			bareq.ifba_expire = brt->brt_expire - time_uptime;
1609		else
1610			bareq.ifba_expire = 0;
1611		bareq.ifba_flags = brt->brt_flags;
1612
1613		memcpy(buf, &bareq, sizeof(bareq));
1614		count++;
1615		buf += sizeof(bareq);
1616		len -= sizeof(bareq);
1617	}
1618out:
1619	bac->ifbac_len = sizeof(bareq) * count;
1620	error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);
1621	free(outbuf, M_TEMP);
1622	return (error);
1623}
1624
1625static int
1626bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1627{
1628	struct ifbareq *req = arg;
1629	struct bridge_iflist *bif;
1630	struct epoch_tracker et;
1631	int error;
1632
1633	NET_EPOCH_ENTER(et);
1634	bif = bridge_lookup_member(sc, req->ifba_ifsname);
1635	if (bif == NULL) {
1636		NET_EPOCH_EXIT(et);
1637		return (ENOENT);
1638	}
1639
1640	/* bridge_rtupdate() may acquire the lock. */
1641	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
1642	    req->ifba_flags);
1643	NET_EPOCH_EXIT(et);
1644
1645	return (error);
1646}
1647
1648static int
1649bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1650{
1651	struct ifbrparam *param = arg;
1652
1653	sc->sc_brttimeout = param->ifbrp_ctime;
1654	return (0);
1655}
1656
1657static int
1658bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1659{
1660	struct ifbrparam *param = arg;
1661
1662	param->ifbrp_ctime = sc->sc_brttimeout;
1663	return (0);
1664}
1665
1666static int
1667bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1668{
1669	struct ifbareq *req = arg;
1670	int vlan = req->ifba_vlan;
1671
1672	/* Userspace uses '0' to mean 'any vlan' */
1673	if (vlan == 0)
1674		vlan = DOT1Q_VID_RSVD_IMPL;
1675
1676	return (bridge_rtdaddr(sc, req->ifba_dst, vlan));
1677}
1678
1679static int
1680bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1681{
1682	struct ifbreq *req = arg;
1683
1684	BRIDGE_RT_LOCK(sc);
1685	bridge_rtflush(sc, req->ifbr_ifsflags);
1686	BRIDGE_RT_UNLOCK(sc);
1687
1688	return (0);
1689}
1690
1691static int
1692bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1693{
1694	struct ifbrparam *param = arg;
1695	struct bstp_state *bs = &sc->sc_stp;
1696
1697	param->ifbrp_prio = bs->bs_bridge_priority;
1698	return (0);
1699}
1700
1701static int
1702bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1703{
1704	struct ifbrparam *param = arg;
1705
1706	return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
1707}
1708
1709static int
1710bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1711{
1712	struct ifbrparam *param = arg;
1713	struct bstp_state *bs = &sc->sc_stp;
1714
1715	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
1716	return (0);
1717}
1718
1719static int
1720bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1721{
1722	struct ifbrparam *param = arg;
1723
1724	return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
1725}
1726
1727static int
1728bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1729{
1730	struct ifbrparam *param = arg;
1731	struct bstp_state *bs = &sc->sc_stp;
1732
1733	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
1734	return (0);
1735}
1736
1737static int
1738bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1739{
1740	struct ifbrparam *param = arg;
1741
1742	return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
1743}
1744
1745static int
1746bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1747{
1748	struct ifbrparam *param = arg;
1749	struct bstp_state *bs = &sc->sc_stp;
1750
1751	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
1752	return (0);
1753}
1754
1755static int
1756bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1757{
1758	struct ifbrparam *param = arg;
1759
1760	return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
1761}
1762
1763static int
1764bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1765{
1766	struct ifbreq *req = arg;
1767	struct bridge_iflist *bif;
1768
1769	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1770	if (bif == NULL)
1771		return (ENOENT);
1772
1773	return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
1774}
1775
1776static int
1777bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1778{
1779	struct ifbreq *req = arg;
1780	struct bridge_iflist *bif;
1781
1782	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1783	if (bif == NULL)
1784		return (ENOENT);
1785
1786	return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
1787}
1788
1789static int
1790bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
1791{
1792	struct ifbreq *req = arg;
1793	struct bridge_iflist *bif;
1794
1795	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1796	if (bif == NULL)
1797		return (ENOENT);
1798
1799	bif->bif_addrmax = req->ifbr_addrmax;
1800	return (0);
1801}
1802
1803static int
1804bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1805{
1806	struct ifbreq *req = arg;
1807	struct bridge_iflist *bif = NULL;
1808	struct ifnet *ifs;
1809
1810	ifs = ifunit(req->ifbr_ifsname);
1811	if (ifs == NULL)
1812		return (ENOENT);
1813
1814	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1815		if (ifs == bif->bif_ifp)
1816			return (EBUSY);
1817
1818	if (ifs->if_bridge != NULL)
1819		return (EBUSY);
1820
1821	switch (ifs->if_type) {
1822		case IFT_ETHER:
1823		case IFT_GIF:
1824		case IFT_L2VLAN:
1825			break;
1826		default:
1827			return (EINVAL);
1828	}
1829
1830	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
1831	if (bif == NULL)
1832		return (ENOMEM);
1833
1834	bif->bif_ifp = ifs;
1835	bif->bif_flags = IFBIF_SPAN;
1836
1837	CK_LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1838
1839	return (0);
1840}
1841
1842static int
1843bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1844{
1845	struct ifbreq *req = arg;
1846	struct bridge_iflist *bif;
1847	struct ifnet *ifs;
1848
1849	ifs = ifunit(req->ifbr_ifsname);
1850	if (ifs == NULL)
1851		return (ENOENT);
1852
1853	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1854		if (ifs == bif->bif_ifp)
1855			break;
1856
1857	if (bif == NULL)
1858		return (ENOENT);
1859
1860	bridge_delete_span(sc, bif);
1861
1862	return (0);
1863}
1864
1865static int
1866bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg)
1867{
1868	struct ifbropreq *req = arg;
1869	struct bstp_state *bs = &sc->sc_stp;
1870	struct bstp_port *root_port;
1871
1872	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;
1873	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;
1874	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;
1875
1876	root_port = bs->bs_root_port;
1877	if (root_port == NULL)
1878		req->ifbop_root_port = 0;
1879	else
1880		req->ifbop_root_port = root_port->bp_ifp->if_index;
1881
1882	req->ifbop_holdcount = bs->bs_txholdcount;
1883	req->ifbop_priority = bs->bs_bridge_priority;
1884	req->ifbop_protocol = bs->bs_protover;
1885	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;
1886	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;
1887	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;
1888	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;
1889	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;
1890	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;
1891
1892	return (0);
1893}
1894
1895static int
1896bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
1897{
1898	struct ifbrparam *param = arg;
1899
1900	param->ifbrp_cexceeded = sc->sc_brtexceeded;
1901	return (0);
1902}
1903
1904static int
1905bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg)
1906{
1907	struct ifbpstpconf *bifstp = arg;
1908	struct bridge_iflist *bif;
1909	struct bstp_port *bp;
1910	struct ifbpstpreq bpreq;
1911	char *buf, *outbuf;
1912	int count, buflen, len, error = 0;
1913
1914	count = 0;
1915	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1916		if ((bif->bif_flags & IFBIF_STP) != 0)
1917			count++;
1918	}
1919
1920	buflen = sizeof(bpreq) * count;
1921	if (bifstp->ifbpstp_len == 0) {
1922		bifstp->ifbpstp_len = buflen;
1923		return (0);
1924	}
1925
1926	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
1927	if (outbuf == NULL)
1928		return (ENOMEM);
1929
1930	count = 0;
1931	buf = outbuf;
1932	len = min(bifstp->ifbpstp_len, buflen);
1933	bzero(&bpreq, sizeof(bpreq));
1934	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1935		if (len < sizeof(bpreq))
1936			break;
1937
1938		if ((bif->bif_flags & IFBIF_STP) == 0)
1939			continue;
1940
1941		bp = &bif->bif_stp;
1942		bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;
1943		bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;
1944		bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;
1945		bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;
1946		bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id;
1947		bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;
1948
1949		memcpy(buf, &bpreq, sizeof(bpreq));
1950		count++;
1951		buf += sizeof(bpreq);
1952		len -= sizeof(bpreq);
1953	}
1954
1955	bifstp->ifbpstp_len = sizeof(bpreq) * count;
1956	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);
1957	free(outbuf, M_TEMP);
1958	return (error);
1959}
1960
1961static int
1962bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
1963{
1964	struct ifbrparam *param = arg;
1965
1966	return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
1967}
1968
1969static int
1970bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
1971{
1972	struct ifbrparam *param = arg;
1973
1974	return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
1975}
1976
1977/*
1978 * bridge_ifdetach:
1979 *
1980 *	Detach an interface from a bridge.  Called when a member
1981 *	interface is detaching.
1982 */
1983static void
1984bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1985{
1986	struct bridge_softc *sc = ifp->if_bridge;
1987	struct bridge_iflist *bif;
1988
1989	if (ifp->if_flags & IFF_RENAMING)
1990		return;
1991	if (V_bridge_cloner == NULL) {
1992		/*
1993		 * This detach handler can be called after
1994		 * vnet_bridge_uninit().  Just return in that case.
1995		 */
1996		return;
1997	}
1998	/* Check if the interface is a bridge member */
1999	if (sc != NULL) {
2000		BRIDGE_LOCK(sc);
2001
2002		bif = bridge_lookup_member_if(sc, ifp);
2003		if (bif != NULL)
2004			bridge_delete_member(sc, bif, 1);
2005
2006		BRIDGE_UNLOCK(sc);
2007		return;
2008	}
2009
2010	/* Check if the interface is a span port */
2011	BRIDGE_LIST_LOCK();
2012	LIST_FOREACH(sc, &V_bridge_list, sc_list) {
2013		BRIDGE_LOCK(sc);
2014		CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
2015			if (ifp == bif->bif_ifp) {
2016				bridge_delete_span(sc, bif);
2017				break;
2018			}
2019
2020		BRIDGE_UNLOCK(sc);
2021	}
2022	BRIDGE_LIST_UNLOCK();
2023}
2024
2025/*
2026 * bridge_init:
2027 *
2028 *	Initialize a bridge interface.
2029 */
2030static void
2031bridge_init(void *xsc)
2032{
2033	struct bridge_softc *sc = (struct bridge_softc *)xsc;
2034	struct ifnet *ifp = sc->sc_ifp;
2035
2036	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2037		return;
2038
2039	BRIDGE_LOCK(sc);
2040	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
2041	    bridge_timer, sc);
2042
2043	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2044	bstp_init(&sc->sc_stp);		/* Initialize Spanning Tree */
2045
2046	BRIDGE_UNLOCK(sc);
2047}
2048
2049/*
2050 * bridge_stop:
2051 *
2052 *	Stop the bridge interface.
2053 */
2054static void
2055bridge_stop(struct ifnet *ifp, int disable)
2056{
2057	struct bridge_softc *sc = ifp->if_softc;
2058
2059	BRIDGE_LOCK_ASSERT(sc);
2060
2061	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2062		return;
2063
2064	BRIDGE_RT_LOCK(sc);
2065	callout_stop(&sc->sc_brcallout);
2066
2067	bstp_stop(&sc->sc_stp);
2068
2069	bridge_rtflush(sc, IFBF_FLUSHDYN);
2070	BRIDGE_RT_UNLOCK(sc);
2071
2072	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2073}
2074
2075/*
2076 * bridge_enqueue:
2077 *
2078 *	Enqueue a packet on a bridge member interface.
2079 *
2080 */
2081static int
2082bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
2083{
2084	int len, err = 0;
2085	short mflags;
2086	struct mbuf *m0;
2087
2088	/* We may be sending a fragment so traverse the mbuf */
2089	for (; m; m = m0) {
2090		m0 = m->m_nextpkt;
2091		m->m_nextpkt = NULL;
2092		len = m->m_pkthdr.len;
2093		mflags = m->m_flags;
2094
2095		/*
2096		 * If underlying interface can not do VLAN tag insertion itself
2097		 * then attach a packet tag that holds it.
2098		 */
2099		if ((m->m_flags & M_VLANTAG) &&
2100		    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
2101			m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2102			if (m == NULL) {
2103				if_printf(dst_ifp,
2104				    "unable to prepend VLAN header\n");
2105				if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1);
2106				continue;
2107			}
2108			m->m_flags &= ~M_VLANTAG;
2109		}
2110
2111		M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */
2112		if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
2113			int n;
2114
2115			for (m = m0, n = 1; m != NULL; m = m0, n++) {
2116				m0 = m->m_nextpkt;
2117				m_freem(m);
2118			}
2119			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, n);
2120			break;
2121		}
2122
2123		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
2124		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
2125		if (mflags & M_MCAST)
2126			if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1);
2127	}
2128
2129	return (err);
2130}
2131
2132/*
2133 * bridge_dummynet:
2134 *
2135 * 	Receive a queued packet from dummynet and pass it on to the output
2136 * 	interface.
2137 *
2138 *	The mbuf has the Ethernet header already attached.
2139 */
2140static void
2141bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
2142{
2143	struct bridge_softc *sc;
2144
2145	sc = ifp->if_bridge;
2146
2147	/*
2148	 * The packet didnt originate from a member interface. This should only
2149	 * ever happen if a member interface is removed while packets are
2150	 * queued for it.
2151	 */
2152	if (sc == NULL) {
2153		m_freem(m);
2154		return;
2155	}
2156
2157	if (PFIL_HOOKED_OUT_46) {
2158		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
2159			return;
2160		if (m == NULL)
2161			return;
2162	}
2163
2164	bridge_enqueue(sc, ifp, m);
2165}
2166
2167/*
2168 * bridge_output:
2169 *
2170 *	Send output from a bridge member interface.  This
2171 *	performs the bridging function for locally originated
2172 *	packets.
2173 *
2174 *	The mbuf has the Ethernet header already attached.  We must
2175 *	enqueue or free the mbuf before returning.
2176 */
2177static int
2178bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2179    struct rtentry *rt)
2180{
2181	struct ether_header *eh;
2182	struct ifnet *bifp, *dst_if;
2183	struct bridge_softc *sc;
2184	uint16_t vlan;
2185
2186	NET_EPOCH_ASSERT();
2187
2188	if (m->m_len < ETHER_HDR_LEN) {
2189		m = m_pullup(m, ETHER_HDR_LEN);
2190		if (m == NULL)
2191			return (0);
2192	}
2193
2194	eh = mtod(m, struct ether_header *);
2195	sc = ifp->if_bridge;
2196	vlan = VLANTAGOF(m);
2197
2198	bifp = sc->sc_ifp;
2199
2200	/*
2201	 * If bridge is down, but the original output interface is up,
2202	 * go ahead and send out that interface.  Otherwise, the packet
2203	 * is dropped below.
2204	 */
2205	if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2206		dst_if = ifp;
2207		goto sendunicast;
2208	}
2209
2210	/*
2211	 * If the packet is a multicast, or we don't know a better way to
2212	 * get there, send to all interfaces.
2213	 */
2214	if (ETHER_IS_MULTICAST(eh->ether_dhost))
2215		dst_if = NULL;
2216	else
2217		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
2218	/* Tap any traffic not passing back out the originating interface */
2219	if (dst_if != ifp)
2220		ETHER_BPF_MTAP(bifp, m);
2221	if (dst_if == NULL) {
2222		struct bridge_iflist *bif;
2223		struct mbuf *mc;
2224		int used = 0;
2225
2226		bridge_span(sc, m);
2227
2228		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
2229			dst_if = bif->bif_ifp;
2230
2231			if (dst_if->if_type == IFT_GIF)
2232				continue;
2233			if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2234				continue;
2235
2236			/*
2237			 * If this is not the original output interface,
2238			 * and the interface is participating in spanning
2239			 * tree, make sure the port is in a state that
2240			 * allows forwarding.
2241			 */
2242			if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) &&
2243			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2244				continue;
2245
2246			if (CK_LIST_NEXT(bif, bif_next) == NULL) {
2247				used = 1;
2248				mc = m;
2249			} else {
2250				mc = m_dup(m, M_NOWAIT);
2251				if (mc == NULL) {
2252					if_inc_counter(bifp, IFCOUNTER_OERRORS, 1);
2253					continue;
2254				}
2255			}
2256
2257			bridge_enqueue(sc, dst_if, mc);
2258		}
2259		if (used == 0)
2260			m_freem(m);
2261		return (0);
2262	}
2263
2264sendunicast:
2265	/*
2266	 * XXX Spanning tree consideration here?
2267	 */
2268
2269	bridge_span(sc, m);
2270	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2271		m_freem(m);
2272		return (0);
2273	}
2274
2275	bridge_enqueue(sc, dst_if, m);
2276	return (0);
2277}
2278
2279/*
2280 * bridge_transmit:
2281 *
2282 *	Do output on a bridge.
2283 *
2284 */
2285static int
2286bridge_transmit(struct ifnet *ifp, struct mbuf *m)
2287{
2288	struct bridge_softc *sc;
2289	struct ether_header *eh;
2290	struct ifnet *dst_if;
2291	int error = 0;
2292
2293	sc = ifp->if_softc;
2294
2295	ETHER_BPF_MTAP(ifp, m);
2296
2297	eh = mtod(m, struct ether_header *);
2298
2299	if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
2300	    (dst_if = bridge_rtlookup(sc, eh->ether_dhost, DOT1Q_VID_NULL)) !=
2301	    NULL) {
2302		error = bridge_enqueue(sc, dst_if, m);
2303	} else
2304		bridge_broadcast(sc, ifp, m, 0);
2305
2306	return (error);
2307}
2308
2309#ifdef ALTQ
2310static void
2311bridge_altq_start(if_t ifp)
2312{
2313	struct ifaltq *ifq = &ifp->if_snd;
2314	struct mbuf *m;
2315
2316	IFQ_LOCK(ifq);
2317	IFQ_DEQUEUE_NOLOCK(ifq, m);
2318	while (m != NULL) {
2319		bridge_transmit(ifp, m);
2320		IFQ_DEQUEUE_NOLOCK(ifq, m);
2321	}
2322	IFQ_UNLOCK(ifq);
2323}
2324
2325static int
2326bridge_altq_transmit(if_t ifp, struct mbuf *m)
2327{
2328	int err;
2329
2330	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
2331		IFQ_ENQUEUE(&ifp->if_snd, m, err);
2332		if (err == 0)
2333			bridge_altq_start(ifp);
2334	} else
2335		err = bridge_transmit(ifp, m);
2336
2337	return (err);
2338}
2339#endif	/* ALTQ */
2340
2341/*
2342 * The ifp->if_qflush entry point for if_bridge(4) is no-op.
2343 */
2344static void
2345bridge_qflush(struct ifnet *ifp __unused)
2346{
2347}
2348
2349/*
2350 * bridge_forward:
2351 *
2352 *	The forwarding function of the bridge.
2353 *
2354 *	NOTE: Releases the lock on return.
2355 */
2356static void
2357bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
2358    struct mbuf *m)
2359{
2360	struct bridge_iflist *dbif;
2361	struct ifnet *src_if, *dst_if, *ifp;
2362	struct ether_header *eh;
2363	uint16_t vlan;
2364	uint8_t *dst;
2365	int error;
2366
2367	NET_EPOCH_ASSERT();
2368
2369	src_if = m->m_pkthdr.rcvif;
2370	ifp = sc->sc_ifp;
2371
2372	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
2373	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
2374	vlan = VLANTAGOF(m);
2375
2376	if ((sbif->bif_flags & IFBIF_STP) &&
2377	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2378		goto drop;
2379
2380	eh = mtod(m, struct ether_header *);
2381	dst = eh->ether_dhost;
2382
2383	/* If the interface is learning, record the address. */
2384	if (sbif->bif_flags & IFBIF_LEARNING) {
2385		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
2386		    sbif, 0, IFBAF_DYNAMIC);
2387		/*
2388		 * If the interface has addresses limits then deny any source
2389		 * that is not in the cache.
2390		 */
2391		if (error && sbif->bif_addrmax)
2392			goto drop;
2393	}
2394
2395	if ((sbif->bif_flags & IFBIF_STP) != 0 &&
2396	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING)
2397		goto drop;
2398
2399#ifdef DEV_NETMAP
2400	/*
2401	 * Hand the packet to netmap only if it wasn't injected by netmap
2402	 * itself.
2403	 */
2404	if ((m->m_flags & M_BRIDGE_INJECT) == 0 &&
2405	    (if_getcapenable(ifp) & IFCAP_NETMAP) != 0) {
2406		ifp->if_input(ifp, m);
2407		return;
2408	}
2409	m->m_flags &= ~M_BRIDGE_INJECT;
2410#endif
2411
2412	/*
2413	 * At this point, the port either doesn't participate
2414	 * in spanning tree or it is in the forwarding state.
2415	 */
2416
2417	/*
2418	 * If the packet is unicast, destined for someone on
2419	 * "this" side of the bridge, drop it.
2420	 */
2421	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2422		dst_if = bridge_rtlookup(sc, dst, vlan);
2423		if (src_if == dst_if)
2424			goto drop;
2425	} else {
2426		/*
2427		 * Check if its a reserved multicast address, any address
2428		 * listed in 802.1D section 7.12.6 may not be forwarded by the
2429		 * bridge.
2430		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
2431		 */
2432		if (dst[0] == 0x01 && dst[1] == 0x80 &&
2433		    dst[2] == 0xc2 && dst[3] == 0x00 &&
2434		    dst[4] == 0x00 && dst[5] <= 0x0f)
2435			goto drop;
2436
2437		/* ...forward it to all interfaces. */
2438		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
2439		dst_if = NULL;
2440	}
2441
2442	/*
2443	 * If we have a destination interface which is a member of our bridge,
2444	 * OR this is a unicast packet, push it through the bpf(4) machinery.
2445	 * For broadcast or multicast packets, don't bother because it will
2446	 * be reinjected into ether_input. We do this before we pass the packets
2447	 * through the pfil(9) framework, as it is possible that pfil(9) will
2448	 * drop the packet, or possibly modify it, making it difficult to debug
2449	 * firewall issues on the bridge.
2450	 */
2451	if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0)
2452		ETHER_BPF_MTAP(ifp, m);
2453
2454	/* run the packet filter */
2455	if (PFIL_HOOKED_IN_46) {
2456		if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2457			return;
2458		if (m == NULL)
2459			return;
2460	}
2461
2462	if (dst_if == NULL) {
2463		bridge_broadcast(sc, src_if, m, 1);
2464		return;
2465	}
2466
2467	/*
2468	 * At this point, we're dealing with a unicast frame
2469	 * going to a different interface.
2470	 */
2471	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2472		goto drop;
2473
2474	dbif = bridge_lookup_member_if(sc, dst_if);
2475	if (dbif == NULL)
2476		/* Not a member of the bridge (anymore?) */
2477		goto drop;
2478
2479	/* Private segments can not talk to each other */
2480	if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)
2481		goto drop;
2482
2483	if ((dbif->bif_flags & IFBIF_STP) &&
2484	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2485		goto drop;
2486
2487	if (PFIL_HOOKED_OUT_46) {
2488		if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2489			return;
2490		if (m == NULL)
2491			return;
2492	}
2493
2494	bridge_enqueue(sc, dst_if, m);
2495	return;
2496
2497drop:
2498	m_freem(m);
2499}
2500
2501/*
2502 * bridge_input:
2503 *
2504 *	Receive input from a member interface.  Queue the packet for
2505 *	bridging if it is not for us.
2506 */
2507static struct mbuf *
2508bridge_input(struct ifnet *ifp, struct mbuf *m)
2509{
2510	struct bridge_softc *sc;
2511	struct bridge_iflist *bif, *bif2;
2512	struct ifnet *bifp;
2513	struct ether_header *eh;
2514	struct mbuf *mc, *mc2;
2515	uint16_t vlan;
2516	int error;
2517
2518	NET_EPOCH_ASSERT();
2519
2520	eh = mtod(m, struct ether_header *);
2521	vlan = VLANTAGOF(m);
2522
2523	sc = ifp->if_bridge;
2524	if (sc == NULL) {
2525		/*
2526		 * This packet originated from the bridge itself, so it must
2527		 * have been transmitted by netmap.  Derive the "source"
2528		 * interface from the source address and drop the packet if the
2529		 * source address isn't known.
2530		 */
2531		KASSERT((m->m_flags & M_BRIDGE_INJECT) != 0,
2532		    ("%s: ifnet %p missing a bridge softc", __func__, ifp));
2533		sc = if_getsoftc(ifp);
2534		ifp = bridge_rtlookup(sc, eh->ether_shost, vlan);
2535		if (ifp == NULL) {
2536			if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
2537			m_freem(m);
2538			return (NULL);
2539		}
2540		m->m_pkthdr.rcvif = ifp;
2541	}
2542	bifp = sc->sc_ifp;
2543	if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2544		return (m);
2545
2546	/*
2547	 * Implement support for bridge monitoring. If this flag has been
2548	 * set on this interface, discard the packet once we push it through
2549	 * the bpf(4) machinery, but before we do, increment the byte and
2550	 * packet counters associated with this interface.
2551	 */
2552	if ((bifp->if_flags & IFF_MONITOR) != 0) {
2553		m->m_pkthdr.rcvif  = bifp;
2554		ETHER_BPF_MTAP(bifp, m);
2555		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);
2556		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
2557		m_freem(m);
2558		return (NULL);
2559	}
2560	bif = bridge_lookup_member_if(sc, ifp);
2561	if (bif == NULL) {
2562		return (m);
2563	}
2564
2565	bridge_span(sc, m);
2566
2567	if (m->m_flags & (M_BCAST|M_MCAST)) {
2568		/* Tap off 802.1D packets; they do not get forwarded. */
2569		if (memcmp(eh->ether_dhost, bstp_etheraddr,
2570		    ETHER_ADDR_LEN) == 0) {
2571			bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */
2572			return (NULL);
2573		}
2574
2575		if ((bif->bif_flags & IFBIF_STP) &&
2576		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
2577			return (m);
2578		}
2579
2580		/*
2581		 * Make a deep copy of the packet and enqueue the copy
2582		 * for bridge processing; return the original packet for
2583		 * local processing.
2584		 */
2585		mc = m_dup(m, M_NOWAIT);
2586		if (mc == NULL) {
2587			return (m);
2588		}
2589
2590		/* Perform the bridge forwarding function with the copy. */
2591		bridge_forward(sc, bif, mc);
2592
2593#ifdef DEV_NETMAP
2594		/*
2595		 * If netmap is enabled and has not already seen this packet,
2596		 * then it will be consumed by bridge_forward().
2597		 */
2598		if ((if_getcapenable(bifp) & IFCAP_NETMAP) != 0 &&
2599		    (m->m_flags & M_BRIDGE_INJECT) == 0) {
2600			m_freem(m);
2601			return (NULL);
2602		}
2603#endif
2604
2605		/*
2606		 * Reinject the mbuf as arriving on the bridge so we have a
2607		 * chance at claiming multicast packets. We can not loop back
2608		 * here from ether_input as a bridge is never a member of a
2609		 * bridge.
2610		 */
2611		KASSERT(bifp->if_bridge == NULL,
2612		    ("loop created in bridge_input"));
2613		mc2 = m_dup(m, M_NOWAIT);
2614		if (mc2 != NULL) {
2615			/* Keep the layer3 header aligned */
2616			int i = min(mc2->m_pkthdr.len, max_protohdr);
2617			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2618		}
2619		if (mc2 != NULL) {
2620			mc2->m_pkthdr.rcvif = bifp;
2621			mc2->m_flags &= ~M_BRIDGE_INJECT;
2622			sc->sc_if_input(bifp, mc2);
2623		}
2624
2625		/* Return the original packet for local processing. */
2626		return (m);
2627	}
2628
2629	if ((bif->bif_flags & IFBIF_STP) &&
2630	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
2631		return (m);
2632	}
2633
2634#if defined(INET) || defined(INET6)
2635#define	CARP_CHECK_WE_ARE_DST(iface) \
2636	((iface)->if_carp && (*carp_forus_p)((iface), eh->ether_dhost))
2637#define	CARP_CHECK_WE_ARE_SRC(iface) \
2638	((iface)->if_carp && (*carp_forus_p)((iface), eh->ether_shost))
2639#else
2640#define	CARP_CHECK_WE_ARE_DST(iface)	false
2641#define	CARP_CHECK_WE_ARE_SRC(iface)	false
2642#endif
2643
2644#ifdef DEV_NETMAP
2645#define	GRAB_FOR_NETMAP(ifp, m) do {					\
2646	if ((if_getcapenable(ifp) & IFCAP_NETMAP) != 0 &&		\
2647	    ((m)->m_flags & M_BRIDGE_INJECT) == 0) {			\
2648		(ifp)->if_input(ifp, m);				\
2649		return (NULL);						\
2650	}								\
2651} while (0)
2652#else
2653#define	GRAB_FOR_NETMAP(ifp, m)
2654#endif
2655
2656#define GRAB_OUR_PACKETS(iface)						\
2657	if ((iface)->if_type == IFT_GIF)				\
2658		continue;						\
2659	/* It is destined for us. */					\
2660	if (memcmp(IF_LLADDR(iface), eh->ether_dhost, ETHER_ADDR_LEN) == 0 || \
2661	    CARP_CHECK_WE_ARE_DST(iface)) {				\
2662		if (bif->bif_flags & IFBIF_LEARNING) {			\
2663			error = bridge_rtupdate(sc, eh->ether_shost,	\
2664			    vlan, bif, 0, IFBAF_DYNAMIC);		\
2665			if (error && bif->bif_addrmax) {		\
2666				m_freem(m);				\
2667				return (NULL);				\
2668			}						\
2669		}							\
2670		m->m_pkthdr.rcvif = iface;				\
2671		if ((iface) == ifp) {					\
2672			/* Skip bridge processing... src == dest */	\
2673			return (m);					\
2674		}							\
2675		/* It's passing over or to the bridge, locally. */	\
2676		ETHER_BPF_MTAP(bifp, m);				\
2677		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);		\
2678		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);\
2679		/* Hand the packet over to netmap if necessary. */	\
2680		GRAB_FOR_NETMAP(bifp, m);				\
2681		/* Filter on the physical interface. */			\
2682		if (V_pfil_local_phys && PFIL_HOOKED_IN_46) {		\
2683			if (bridge_pfil(&m, NULL, ifp,			\
2684			    PFIL_IN) != 0 || m == NULL) {		\
2685				return (NULL);				\
2686			}						\
2687		}							\
2688		if ((iface) != bifp)					\
2689			ETHER_BPF_MTAP(iface, m);			\
2690		return (m);						\
2691	}								\
2692									\
2693	/* We just received a packet that we sent out. */		\
2694	if (memcmp(IF_LLADDR(iface), eh->ether_shost, ETHER_ADDR_LEN) == 0 || \
2695	    CARP_CHECK_WE_ARE_SRC(iface)) {				\
2696		m_freem(m);						\
2697		return (NULL);						\
2698	}
2699
2700	/*
2701	 * Unicast.  Make sure it's not for the bridge.
2702	 */
2703	do { GRAB_OUR_PACKETS(bifp) } while (0);
2704
2705	/*
2706	 * Give a chance for ifp at first priority. This will help when	the
2707	 * packet comes through the interface like VLAN's with the same MACs
2708	 * on several interfaces from the same bridge. This also will save
2709	 * some CPU cycles in case the destination interface and the input
2710	 * interface (eq ifp) are the same.
2711	 */
2712	do { GRAB_OUR_PACKETS(ifp) } while (0);
2713
2714	/* Now check the all bridge members. */
2715	CK_LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
2716		GRAB_OUR_PACKETS(bif2->bif_ifp)
2717	}
2718
2719#undef CARP_CHECK_WE_ARE_DST
2720#undef CARP_CHECK_WE_ARE_SRC
2721#undef GRAB_FOR_NETMAP
2722#undef GRAB_OUR_PACKETS
2723
2724	/* Perform the bridge forwarding function. */
2725	bridge_forward(sc, bif, m);
2726
2727	return (NULL);
2728}
2729
2730/*
2731 * Inject a packet back into the host ethernet stack.  This will generally only
2732 * be used by netmap when an application writes to the host TX ring.  The
2733 * M_BRIDGE_INJECT flag ensures that the packet is re-routed to the bridge
2734 * interface after ethernet processing.
2735 */
2736static void
2737bridge_inject(struct ifnet *ifp, struct mbuf *m)
2738{
2739	struct bridge_softc *sc;
2740
2741	KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0,
2742	    ("%s: iface %s is not running in netmap mode",
2743	    __func__, if_name(ifp)));
2744	KASSERT((m->m_flags & M_BRIDGE_INJECT) == 0,
2745	    ("%s: mbuf %p has M_BRIDGE_INJECT set", __func__, m));
2746
2747	m->m_flags |= M_BRIDGE_INJECT;
2748	sc = if_getsoftc(ifp);
2749	sc->sc_if_input(ifp, m);
2750}
2751
2752/*
2753 * bridge_broadcast:
2754 *
2755 *	Send a frame to all interfaces that are members of
2756 *	the bridge, except for the one on which the packet
2757 *	arrived.
2758 *
2759 *	NOTE: Releases the lock on return.
2760 */
2761static void
2762bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2763    struct mbuf *m, int runfilt)
2764{
2765	struct bridge_iflist *dbif, *sbif;
2766	struct mbuf *mc;
2767	struct ifnet *dst_if;
2768	int used = 0, i;
2769
2770	NET_EPOCH_ASSERT();
2771
2772	sbif = bridge_lookup_member_if(sc, src_if);
2773
2774	/* Filter on the bridge interface before broadcasting */
2775	if (runfilt && PFIL_HOOKED_OUT_46) {
2776		if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
2777			return;
2778		if (m == NULL)
2779			return;
2780	}
2781
2782	CK_LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) {
2783		dst_if = dbif->bif_ifp;
2784		if (dst_if == src_if)
2785			continue;
2786
2787		/* Private segments can not talk to each other */
2788		if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
2789			continue;
2790
2791		if ((dbif->bif_flags & IFBIF_STP) &&
2792		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2793			continue;
2794
2795		if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 &&
2796		    (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2797			continue;
2798
2799		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2800			continue;
2801
2802		if (CK_LIST_NEXT(dbif, bif_next) == NULL) {
2803			mc = m;
2804			used = 1;
2805		} else {
2806			mc = m_dup(m, M_NOWAIT);
2807			if (mc == NULL) {
2808				if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
2809				continue;
2810			}
2811		}
2812
2813		/*
2814		 * Filter on the output interface. Pass a NULL bridge interface
2815		 * pointer so we do not redundantly filter on the bridge for
2816		 * each interface we broadcast on.
2817		 */
2818		if (runfilt && PFIL_HOOKED_OUT_46) {
2819			if (used == 0) {
2820				/* Keep the layer3 header aligned */
2821				i = min(mc->m_pkthdr.len, max_protohdr);
2822				mc = m_copyup(mc, i, ETHER_ALIGN);
2823				if (mc == NULL) {
2824					if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
2825					continue;
2826				}
2827			}
2828			if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
2829				continue;
2830			if (mc == NULL)
2831				continue;
2832		}
2833
2834		bridge_enqueue(sc, dst_if, mc);
2835	}
2836	if (used == 0)
2837		m_freem(m);
2838}
2839
2840/*
2841 * bridge_span:
2842 *
2843 *	Duplicate a packet out one or more interfaces that are in span mode,
2844 *	the original mbuf is unmodified.
2845 */
2846static void
2847bridge_span(struct bridge_softc *sc, struct mbuf *m)
2848{
2849	struct bridge_iflist *bif;
2850	struct ifnet *dst_if;
2851	struct mbuf *mc;
2852
2853	NET_EPOCH_ASSERT();
2854
2855	if (CK_LIST_EMPTY(&sc->sc_spanlist))
2856		return;
2857
2858	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2859		dst_if = bif->bif_ifp;
2860
2861		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2862			continue;
2863
2864		mc = m_dup(m, M_NOWAIT);
2865		if (mc == NULL) {
2866			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
2867			continue;
2868		}
2869
2870		bridge_enqueue(sc, dst_if, mc);
2871	}
2872}
2873
2874/*
2875 * bridge_rtupdate:
2876 *
2877 *	Add a bridge routing entry.
2878 */
2879static int
2880bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
2881    struct bridge_iflist *bif, int setflags, uint8_t flags)
2882{
2883	struct bridge_rtnode *brt;
2884	struct bridge_iflist *obif;
2885	int error;
2886
2887	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
2888
2889	/* Check the source address is valid and not multicast. */
2890	if (ETHER_IS_MULTICAST(dst) ||
2891	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
2892	     dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
2893		return (EINVAL);
2894
2895	/*
2896	 * A route for this destination might already exist.  If so,
2897	 * update it, otherwise create a new one.
2898	 */
2899	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
2900		BRIDGE_RT_LOCK(sc);
2901
2902		/* Check again, now that we have the lock. There could have
2903		 * been a race and we only want to insert this once. */
2904		if (bridge_rtnode_lookup(sc, dst, vlan) != NULL) {
2905			BRIDGE_RT_UNLOCK(sc);
2906			return (0);
2907		}
2908
2909		if (sc->sc_brtcnt >= sc->sc_brtmax) {
2910			sc->sc_brtexceeded++;
2911			BRIDGE_RT_UNLOCK(sc);
2912			return (ENOSPC);
2913		}
2914		/* Check per interface address limits (if enabled) */
2915		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
2916			bif->bif_addrexceeded++;
2917			BRIDGE_RT_UNLOCK(sc);
2918			return (ENOSPC);
2919		}
2920
2921		/*
2922		 * Allocate a new bridge forwarding node, and
2923		 * initialize the expiration time and Ethernet
2924		 * address.
2925		 */
2926		brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO);
2927		if (brt == NULL) {
2928			BRIDGE_RT_UNLOCK(sc);
2929			return (ENOMEM);
2930		}
2931		brt->brt_vnet = curvnet;
2932
2933		if (bif->bif_flags & IFBIF_STICKY)
2934			brt->brt_flags = IFBAF_STICKY;
2935		else
2936			brt->brt_flags = IFBAF_DYNAMIC;
2937
2938		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2939		brt->brt_vlan = vlan;
2940
2941		brt->brt_dst = bif;
2942		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
2943			uma_zfree(V_bridge_rtnode_zone, brt);
2944			BRIDGE_RT_UNLOCK(sc);
2945			return (error);
2946		}
2947		bif->bif_addrcnt++;
2948
2949		BRIDGE_RT_UNLOCK(sc);
2950	}
2951
2952	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2953	    (obif = brt->brt_dst) != bif) {
2954		MPASS(obif != NULL);
2955
2956		BRIDGE_RT_LOCK(sc);
2957		brt->brt_dst->bif_addrcnt--;
2958		brt->brt_dst = bif;
2959		brt->brt_dst->bif_addrcnt++;
2960		BRIDGE_RT_UNLOCK(sc);
2961
2962		if (V_log_mac_flap &&
2963		    ppsratecheck(&V_log_last, &V_log_count, V_log_interval)) {
2964			log(LOG_NOTICE,
2965			    "%s: mac address %6D vlan %d moved from %s to %s\n",
2966			    sc->sc_ifp->if_xname,
2967			    &brt->brt_addr[0], ":",
2968			    brt->brt_vlan,
2969			    obif->bif_ifp->if_xname,
2970			    bif->bif_ifp->if_xname);
2971		}
2972	}
2973
2974	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2975		brt->brt_expire = time_uptime + sc->sc_brttimeout;
2976	if (setflags)
2977		brt->brt_flags = flags;
2978
2979	return (0);
2980}
2981
2982/*
2983 * bridge_rtlookup:
2984 *
2985 *	Lookup the destination interface for an address.
2986 */
2987static struct ifnet *
2988bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
2989{
2990	struct bridge_rtnode *brt;
2991
2992	NET_EPOCH_ASSERT();
2993
2994	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL)
2995		return (NULL);
2996
2997	return (brt->brt_ifp);
2998}
2999
3000/*
3001 * bridge_rttrim:
3002 *
3003 *	Trim the routine table so that we have a number
3004 *	of routing entries less than or equal to the
3005 *	maximum number.
3006 */
3007static void
3008bridge_rttrim(struct bridge_softc *sc)
3009{
3010	struct bridge_rtnode *brt, *nbrt;
3011
3012	NET_EPOCH_ASSERT();
3013	BRIDGE_RT_LOCK_ASSERT(sc);
3014
3015	/* Make sure we actually need to do this. */
3016	if (sc->sc_brtcnt <= sc->sc_brtmax)
3017		return;
3018
3019	/* Force an aging cycle; this might trim enough addresses. */
3020	bridge_rtage(sc);
3021	if (sc->sc_brtcnt <= sc->sc_brtmax)
3022		return;
3023
3024	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3025		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3026			bridge_rtnode_destroy(sc, brt);
3027			if (sc->sc_brtcnt <= sc->sc_brtmax)
3028				return;
3029		}
3030	}
3031}
3032
3033/*
3034 * bridge_timer:
3035 *
3036 *	Aging timer for the bridge.
3037 */
3038static void
3039bridge_timer(void *arg)
3040{
3041	struct bridge_softc *sc = arg;
3042
3043	BRIDGE_RT_LOCK_ASSERT(sc);
3044
3045	/* Destruction of rtnodes requires a proper vnet context */
3046	CURVNET_SET(sc->sc_ifp->if_vnet);
3047	bridge_rtage(sc);
3048
3049	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
3050		callout_reset(&sc->sc_brcallout,
3051		    bridge_rtable_prune_period * hz, bridge_timer, sc);
3052	CURVNET_RESTORE();
3053}
3054
3055/*
3056 * bridge_rtage:
3057 *
3058 *	Perform an aging cycle.
3059 */
3060static void
3061bridge_rtage(struct bridge_softc *sc)
3062{
3063	struct bridge_rtnode *brt, *nbrt;
3064
3065	BRIDGE_RT_LOCK_ASSERT(sc);
3066
3067	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3068		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3069			if (time_uptime >= brt->brt_expire)
3070				bridge_rtnode_destroy(sc, brt);
3071		}
3072	}
3073}
3074
3075/*
3076 * bridge_rtflush:
3077 *
3078 *	Remove all dynamic addresses from the bridge.
3079 */
3080static void
3081bridge_rtflush(struct bridge_softc *sc, int full)
3082{
3083	struct bridge_rtnode *brt, *nbrt;
3084
3085	BRIDGE_RT_LOCK_ASSERT(sc);
3086
3087	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3088		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
3089			bridge_rtnode_destroy(sc, brt);
3090	}
3091}
3092
3093/*
3094 * bridge_rtdaddr:
3095 *
3096 *	Remove an address from the table.
3097 */
3098static int
3099bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
3100{
3101	struct bridge_rtnode *brt;
3102	int found = 0;
3103
3104	BRIDGE_RT_LOCK(sc);
3105
3106	/*
3107	 * If vlan is DOT1Q_VID_RSVD_IMPL then we want to delete for all vlans
3108	 * so the lookup may return more than one.
3109	 */
3110	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
3111		bridge_rtnode_destroy(sc, brt);
3112		found = 1;
3113	}
3114
3115	BRIDGE_RT_UNLOCK(sc);
3116
3117	return (found ? 0 : ENOENT);
3118}
3119
3120/*
3121 * bridge_rtdelete:
3122 *
3123 *	Delete routes to a speicifc member interface.
3124 */
3125static void
3126bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
3127{
3128	struct bridge_rtnode *brt, *nbrt;
3129
3130	BRIDGE_RT_LOCK_ASSERT(sc);
3131
3132	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3133		if (brt->brt_ifp == ifp && (full ||
3134			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
3135			bridge_rtnode_destroy(sc, brt);
3136	}
3137}
3138
3139/*
3140 * bridge_rtable_init:
3141 *
3142 *	Initialize the route table for this bridge.
3143 */
3144static void
3145bridge_rtable_init(struct bridge_softc *sc)
3146{
3147	int i;
3148
3149	sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
3150	    M_DEVBUF, M_WAITOK);
3151
3152	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3153		CK_LIST_INIT(&sc->sc_rthash[i]);
3154
3155	sc->sc_rthash_key = arc4random();
3156	CK_LIST_INIT(&sc->sc_rtlist);
3157}
3158
3159/*
3160 * bridge_rtable_fini:
3161 *
3162 *	Deconstruct the route table for this bridge.
3163 */
3164static void
3165bridge_rtable_fini(struct bridge_softc *sc)
3166{
3167
3168	KASSERT(sc->sc_brtcnt == 0,
3169	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
3170	free(sc->sc_rthash, M_DEVBUF);
3171}
3172
3173/*
3174 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3175 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3176 */
3177#define	mix(a, b, c)							\
3178do {									\
3179	a -= b; a -= c; a ^= (c >> 13);					\
3180	b -= c; b -= a; b ^= (a << 8);					\
3181	c -= a; c -= b; c ^= (b >> 13);					\
3182	a -= b; a -= c; a ^= (c >> 12);					\
3183	b -= c; b -= a; b ^= (a << 16);					\
3184	c -= a; c -= b; c ^= (b >> 5);					\
3185	a -= b; a -= c; a ^= (c >> 3);					\
3186	b -= c; b -= a; b ^= (a << 10);					\
3187	c -= a; c -= b; c ^= (b >> 15);					\
3188} while (/*CONSTCOND*/0)
3189
3190static __inline uint32_t
3191bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3192{
3193	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3194
3195	b += addr[5] << 8;
3196	b += addr[4];
3197	a += addr[3] << 24;
3198	a += addr[2] << 16;
3199	a += addr[1] << 8;
3200	a += addr[0];
3201
3202	mix(a, b, c);
3203
3204	return (c & BRIDGE_RTHASH_MASK);
3205}
3206
3207#undef mix
3208
3209static int
3210bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3211{
3212	int i, d;
3213
3214	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3215		d = ((int)a[i]) - ((int)b[i]);
3216	}
3217
3218	return (d);
3219}
3220
3221/*
3222 * bridge_rtnode_lookup:
3223 *
3224 *	Look up a bridge route node for the specified destination. Compare the
3225 *	vlan id or if zero then just return the first match.
3226 */
3227static struct bridge_rtnode *
3228bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
3229{
3230	struct bridge_rtnode *brt;
3231	uint32_t hash;
3232	int dir;
3233
3234	BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(sc);
3235
3236	hash = bridge_rthash(sc, addr);
3237	CK_LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
3238		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3239		if (dir == 0 && (brt->brt_vlan == vlan || vlan == DOT1Q_VID_RSVD_IMPL))
3240			return (brt);
3241		if (dir > 0)
3242			return (NULL);
3243	}
3244
3245	return (NULL);
3246}
3247
3248/*
3249 * bridge_rtnode_insert:
3250 *
3251 *	Insert the specified bridge node into the route table.  We
3252 *	assume the entry is not already in the table.
3253 */
3254static int
3255bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3256{
3257	struct bridge_rtnode *lbrt;
3258	uint32_t hash;
3259	int dir;
3260
3261	BRIDGE_RT_LOCK_ASSERT(sc);
3262
3263	hash = bridge_rthash(sc, brt->brt_addr);
3264
3265	lbrt = CK_LIST_FIRST(&sc->sc_rthash[hash]);
3266	if (lbrt == NULL) {
3267		CK_LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
3268		goto out;
3269	}
3270
3271	do {
3272		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3273		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
3274			return (EEXIST);
3275		if (dir > 0) {
3276			CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3277			goto out;
3278		}
3279		if (CK_LIST_NEXT(lbrt, brt_hash) == NULL) {
3280			CK_LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3281			goto out;
3282		}
3283		lbrt = CK_LIST_NEXT(lbrt, brt_hash);
3284	} while (lbrt != NULL);
3285
3286#ifdef DIAGNOSTIC
3287	panic("bridge_rtnode_insert: impossible");
3288#endif
3289
3290out:
3291	CK_LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
3292	sc->sc_brtcnt++;
3293
3294	return (0);
3295}
3296
3297static void
3298bridge_rtnode_destroy_cb(struct epoch_context *ctx)
3299{
3300	struct bridge_rtnode *brt;
3301
3302	brt = __containerof(ctx, struct bridge_rtnode, brt_epoch_ctx);
3303
3304	CURVNET_SET(brt->brt_vnet);
3305	uma_zfree(V_bridge_rtnode_zone, brt);
3306	CURVNET_RESTORE();
3307}
3308
3309/*
3310 * bridge_rtnode_destroy:
3311 *
3312 *	Destroy a bridge rtnode.
3313 */
3314static void
3315bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3316{
3317	BRIDGE_RT_LOCK_ASSERT(sc);
3318
3319	CK_LIST_REMOVE(brt, brt_hash);
3320
3321	CK_LIST_REMOVE(brt, brt_list);
3322	sc->sc_brtcnt--;
3323	brt->brt_dst->bif_addrcnt--;
3324
3325	NET_EPOCH_CALL(bridge_rtnode_destroy_cb, &brt->brt_epoch_ctx);
3326}
3327
3328/*
3329 * bridge_rtable_expire:
3330 *
3331 *	Set the expiry time for all routes on an interface.
3332 */
3333static void
3334bridge_rtable_expire(struct ifnet *ifp, int age)
3335{
3336	struct bridge_softc *sc = ifp->if_bridge;
3337	struct bridge_rtnode *brt;
3338
3339	CURVNET_SET(ifp->if_vnet);
3340	BRIDGE_RT_LOCK(sc);
3341
3342	/*
3343	 * If the age is zero then flush, otherwise set all the expiry times to
3344	 * age for the interface
3345	 */
3346	if (age == 0)
3347		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
3348	else {
3349		CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
3350			/* Cap the expiry time to 'age' */
3351			if (brt->brt_ifp == ifp &&
3352			    brt->brt_expire > time_uptime + age &&
3353			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
3354				brt->brt_expire = time_uptime + age;
3355		}
3356	}
3357	BRIDGE_RT_UNLOCK(sc);
3358	CURVNET_RESTORE();
3359}
3360
3361/*
3362 * bridge_state_change:
3363 *
3364 *	Callback from the bridgestp code when a port changes states.
3365 */
3366static void
3367bridge_state_change(struct ifnet *ifp, int state)
3368{
3369	struct bridge_softc *sc = ifp->if_bridge;
3370	static const char *stpstates[] = {
3371		"disabled",
3372		"listening",
3373		"learning",
3374		"forwarding",
3375		"blocking",
3376		"discarding"
3377	};
3378
3379	CURVNET_SET(ifp->if_vnet);
3380	if (V_log_stp)
3381		log(LOG_NOTICE, "%s: state changed to %s on %s\n",
3382		    sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
3383	CURVNET_RESTORE();
3384}
3385
3386/*
3387 * Send bridge packets through pfil if they are one of the types pfil can deal
3388 * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3389 * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3390 * that interface.
3391 */
3392static int
3393bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3394{
3395	int snap, error, i;
3396	struct ether_header *eh1, eh2;
3397	struct llc llc1;
3398	u_int16_t ether_type;
3399	pfil_return_t rv;
3400#ifdef INET
3401	struct ip *ip = NULL;
3402	int hlen = 0;
3403#endif
3404
3405	snap = 0;
3406	error = -1;	/* Default error if not error == 0 */
3407
3408#if 0
3409	/* we may return with the IP fields swapped, ensure its not shared */
3410	KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
3411#endif
3412
3413	if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0)
3414		return (0); /* filtering is disabled */
3415
3416	i = min((*mp)->m_pkthdr.len, max_protohdr);
3417	if ((*mp)->m_len < i) {
3418	    *mp = m_pullup(*mp, i);
3419	    if (*mp == NULL) {
3420		printf("%s: m_pullup failed\n", __func__);
3421		return (-1);
3422	    }
3423	}
3424
3425	eh1 = mtod(*mp, struct ether_header *);
3426	ether_type = ntohs(eh1->ether_type);
3427
3428	/*
3429	 * Check for SNAP/LLC.
3430	 */
3431	if (ether_type < ETHERMTU) {
3432		struct llc *llc2 = (struct llc *)(eh1 + 1);
3433
3434		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3435		    llc2->llc_dsap == LLC_SNAP_LSAP &&
3436		    llc2->llc_ssap == LLC_SNAP_LSAP &&
3437		    llc2->llc_control == LLC_UI) {
3438			ether_type = htons(llc2->llc_un.type_snap.ether_type);
3439			snap = 1;
3440		}
3441	}
3442
3443	/*
3444	 * If we're trying to filter bridge traffic, only look at traffic for
3445	 * protocols available in the kernel (IPv4 and/or IPv6) to avoid
3446	 * passing traffic for an unsupported protocol to the filter.  This is
3447	 * lame since if we really wanted, say, an AppleTalk filter, we are
3448	 * hosed, but of course we don't have an AppleTalk filter to begin
3449	 * with.  (Note that since pfil doesn't understand ARP it will pass
3450	 * *ALL* ARP traffic.)
3451	 */
3452	switch (ether_type) {
3453#ifdef INET
3454		case ETHERTYPE_ARP:
3455		case ETHERTYPE_REVARP:
3456			if (V_pfil_ipfw_arp == 0)
3457				return (0); /* Automatically pass */
3458
3459			/* FALLTHROUGH */
3460		case ETHERTYPE_IP:
3461#endif
3462#ifdef INET6
3463		case ETHERTYPE_IPV6:
3464#endif /* INET6 */
3465			break;
3466
3467		default:
3468			/*
3469			 * We get here if the packet isn't from a supported
3470			 * protocol.  Check to see if the user wants to pass
3471			 * non-IP packets, these will not be checked by pfil(9)
3472			 * and passed unconditionally so the default is to
3473			 * drop.
3474			 */
3475			if (V_pfil_onlyip)
3476				goto bad;
3477	}
3478
3479	/* Run the packet through pfil before stripping link headers */
3480	if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 &&
3481	    dir == PFIL_OUT && ifp != NULL) {
3482		switch (pfil_mbuf_out(V_link_pfil_head, mp, ifp, NULL)) {
3483		case PFIL_DROPPED:
3484			return (EACCES);
3485		case PFIL_CONSUMED:
3486			return (0);
3487		}
3488	}
3489
3490	/* Strip off the Ethernet header and keep a copy. */
3491	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3492	m_adj(*mp, ETHER_HDR_LEN);
3493
3494	/* Strip off snap header, if present */
3495	if (snap) {
3496		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3497		m_adj(*mp, sizeof(struct llc));
3498	}
3499
3500	/*
3501	 * Check the IP header for alignment and errors
3502	 */
3503	if (dir == PFIL_IN) {
3504		switch (ether_type) {
3505#ifdef INET
3506			case ETHERTYPE_IP:
3507				error = bridge_ip_checkbasic(mp);
3508				break;
3509#endif
3510#ifdef INET6
3511			case ETHERTYPE_IPV6:
3512				error = bridge_ip6_checkbasic(mp);
3513				break;
3514#endif /* INET6 */
3515			default:
3516				error = 0;
3517		}
3518		if (error)
3519			goto bad;
3520	}
3521
3522	error = 0;
3523
3524	/*
3525	 * Run the packet through pfil
3526	 */
3527	rv = PFIL_PASS;
3528	switch (ether_type) {
3529#ifdef INET
3530	case ETHERTYPE_IP:
3531		/*
3532		 * Run pfil on the member interface and the bridge, both can
3533		 * be skipped by clearing pfil_member or pfil_bridge.
3534		 *
3535		 * Keep the order:
3536		 *   in_if -> bridge_if -> out_if
3537		 */
3538		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
3539		    pfil_mbuf_out(V_inet_pfil_head, mp, bifp, NULL)) !=
3540		    PFIL_PASS)
3541			break;
3542
3543		if (V_pfil_member && ifp != NULL) {
3544			rv = (dir == PFIL_OUT) ?
3545			    pfil_mbuf_out(V_inet_pfil_head, mp, ifp, NULL) :
3546			    pfil_mbuf_in(V_inet_pfil_head, mp, ifp, NULL);
3547			if (rv != PFIL_PASS)
3548				break;
3549		}
3550
3551		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
3552		    pfil_mbuf_in(V_inet_pfil_head, mp, bifp, NULL)) !=
3553		    PFIL_PASS)
3554			break;
3555
3556		/* check if we need to fragment the packet */
3557		/* bridge_fragment generates a mbuf chain of packets */
3558		/* that already include eth headers */
3559		if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) {
3560			i = (*mp)->m_pkthdr.len;
3561			if (i > ifp->if_mtu) {
3562				error = bridge_fragment(ifp, mp, &eh2, snap,
3563					    &llc1);
3564				return (error);
3565			}
3566		}
3567
3568		/* Recalculate the ip checksum. */
3569		ip = mtod(*mp, struct ip *);
3570		hlen = ip->ip_hl << 2;
3571		if (hlen < sizeof(struct ip))
3572			goto bad;
3573		if (hlen > (*mp)->m_len) {
3574			if ((*mp = m_pullup(*mp, hlen)) == NULL)
3575				goto bad;
3576			ip = mtod(*mp, struct ip *);
3577			if (ip == NULL)
3578				goto bad;
3579		}
3580		ip->ip_sum = 0;
3581		if (hlen == sizeof(struct ip))
3582			ip->ip_sum = in_cksum_hdr(ip);
3583		else
3584			ip->ip_sum = in_cksum(*mp, hlen);
3585
3586		break;
3587#endif /* INET */
3588#ifdef INET6
3589	case ETHERTYPE_IPV6:
3590		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
3591		    pfil_mbuf_out(V_inet6_pfil_head, mp, bifp, NULL)) !=
3592		    PFIL_PASS)
3593			break;
3594
3595		if (V_pfil_member && ifp != NULL) {
3596			rv = (dir == PFIL_OUT) ?
3597			    pfil_mbuf_out(V_inet6_pfil_head, mp, ifp, NULL) :
3598			    pfil_mbuf_in(V_inet6_pfil_head, mp, ifp, NULL);
3599			if (rv != PFIL_PASS)
3600				break;
3601		}
3602
3603		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
3604		    pfil_mbuf_in(V_inet6_pfil_head, mp, bifp, NULL)) !=
3605		    PFIL_PASS)
3606			break;
3607		break;
3608#endif
3609	}
3610
3611	switch (rv) {
3612	case PFIL_CONSUMED:
3613		return (0);
3614	case PFIL_DROPPED:
3615		return (EACCES);
3616	default:
3617		break;
3618	}
3619
3620	error = -1;
3621
3622	/*
3623	 * Finally, put everything back the way it was and return
3624	 */
3625	if (snap) {
3626		M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
3627		if (*mp == NULL)
3628			return (error);
3629		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
3630	}
3631
3632	M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
3633	if (*mp == NULL)
3634		return (error);
3635	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
3636
3637	return (0);
3638
3639bad:
3640	m_freem(*mp);
3641	*mp = NULL;
3642	return (error);
3643}
3644
3645#ifdef INET
3646/*
3647 * Perform basic checks on header size since
3648 * pfil assumes ip_input has already processed
3649 * it for it.  Cut-and-pasted from ip_input.c.
3650 * Given how simple the IPv6 version is,
3651 * does the IPv4 version really need to be
3652 * this complicated?
3653 *
3654 * XXX Should we update ipstat here, or not?
3655 * XXX Right now we update ipstat but not
3656 * XXX csum_counter.
3657 */
3658static int
3659bridge_ip_checkbasic(struct mbuf **mp)
3660{
3661	struct mbuf *m = *mp;
3662	struct ip *ip;
3663	int len, hlen;
3664	u_short sum;
3665
3666	if (*mp == NULL)
3667		return (-1);
3668
3669	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3670		if ((m = m_copyup(m, sizeof(struct ip),
3671			(max_linkhdr + 3) & ~3)) == NULL) {
3672			/* XXXJRT new stat, please */
3673			KMOD_IPSTAT_INC(ips_toosmall);
3674			goto bad;
3675		}
3676	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
3677		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
3678			KMOD_IPSTAT_INC(ips_toosmall);
3679			goto bad;
3680		}
3681	}
3682	ip = mtod(m, struct ip *);
3683	if (ip == NULL) goto bad;
3684
3685	if (ip->ip_v != IPVERSION) {
3686		KMOD_IPSTAT_INC(ips_badvers);
3687		goto bad;
3688	}
3689	hlen = ip->ip_hl << 2;
3690	if (hlen < sizeof(struct ip)) { /* minimum header length */
3691		KMOD_IPSTAT_INC(ips_badhlen);
3692		goto bad;
3693	}
3694	if (hlen > m->m_len) {
3695		if ((m = m_pullup(m, hlen)) == NULL) {
3696			KMOD_IPSTAT_INC(ips_badhlen);
3697			goto bad;
3698		}
3699		ip = mtod(m, struct ip *);
3700		if (ip == NULL) goto bad;
3701	}
3702
3703	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
3704		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
3705	} else {
3706		if (hlen == sizeof(struct ip)) {
3707			sum = in_cksum_hdr(ip);
3708		} else {
3709			sum = in_cksum(m, hlen);
3710		}
3711	}
3712	if (sum) {
3713		KMOD_IPSTAT_INC(ips_badsum);
3714		goto bad;
3715	}
3716
3717	/* Retrieve the packet length. */
3718	len = ntohs(ip->ip_len);
3719
3720	/*
3721	 * Check for additional length bogosity
3722	 */
3723	if (len < hlen) {
3724		KMOD_IPSTAT_INC(ips_badlen);
3725		goto bad;
3726	}
3727
3728	/*
3729	 * Check that the amount of data in the buffers
3730	 * is as at least much as the IP header would have us expect.
3731	 * Drop packet if shorter than we expect.
3732	 */
3733	if (m->m_pkthdr.len < len) {
3734		KMOD_IPSTAT_INC(ips_tooshort);
3735		goto bad;
3736	}
3737
3738	/* Checks out, proceed */
3739	*mp = m;
3740	return (0);
3741
3742bad:
3743	*mp = m;
3744	return (-1);
3745}
3746#endif /* INET */
3747
3748#ifdef INET6
3749/*
3750 * Same as above, but for IPv6.
3751 * Cut-and-pasted from ip6_input.c.
3752 * XXX Should we update ip6stat, or not?
3753 */
3754static int
3755bridge_ip6_checkbasic(struct mbuf **mp)
3756{
3757	struct mbuf *m = *mp;
3758	struct ip6_hdr *ip6;
3759
3760	/*
3761	 * If the IPv6 header is not aligned, slurp it up into a new
3762	 * mbuf with space for link headers, in the event we forward
3763	 * it.  Otherwise, if it is aligned, make sure the entire base
3764	 * IPv6 header is in the first mbuf of the chain.
3765	 */
3766	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3767		struct ifnet *inifp = m->m_pkthdr.rcvif;
3768		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
3769			    (max_linkhdr + 3) & ~3)) == NULL) {
3770			/* XXXJRT new stat, please */
3771			IP6STAT_INC(ip6s_toosmall);
3772			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3773			goto bad;
3774		}
3775	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
3776		struct ifnet *inifp = m->m_pkthdr.rcvif;
3777		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
3778			IP6STAT_INC(ip6s_toosmall);
3779			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3780			goto bad;
3781		}
3782	}
3783
3784	ip6 = mtod(m, struct ip6_hdr *);
3785
3786	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
3787		IP6STAT_INC(ip6s_badvers);
3788		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
3789		goto bad;
3790	}
3791
3792	/* Checks out, proceed */
3793	*mp = m;
3794	return (0);
3795
3796bad:
3797	*mp = m;
3798	return (-1);
3799}
3800#endif /* INET6 */
3801
3802#ifdef INET
3803/*
3804 * bridge_fragment:
3805 *
3806 *	Fragment mbuf chain in multiple packets and prepend ethernet header.
3807 */
3808static int
3809bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh,
3810    int snap, struct llc *llc)
3811{
3812	struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL;
3813	struct ip *ip;
3814	int error = -1;
3815
3816	if (m->m_len < sizeof(struct ip) &&
3817	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
3818		goto dropit;
3819	ip = mtod(m, struct ip *);
3820
3821	m->m_pkthdr.csum_flags |= CSUM_IP;
3822	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
3823	if (error)
3824		goto dropit;
3825
3826	/*
3827	 * Walk the chain and re-add the Ethernet header for
3828	 * each mbuf packet.
3829	 */
3830	for (mcur = m; mcur; mcur = mcur->m_nextpkt) {
3831		nextpkt = mcur->m_nextpkt;
3832		mcur->m_nextpkt = NULL;
3833		if (snap) {
3834			M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT);
3835			if (mcur == NULL) {
3836				error = ENOBUFS;
3837				if (mprev != NULL)
3838					mprev->m_nextpkt = nextpkt;
3839				goto dropit;
3840			}
3841			bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc));
3842		}
3843
3844		M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT);
3845		if (mcur == NULL) {
3846			error = ENOBUFS;
3847			if (mprev != NULL)
3848				mprev->m_nextpkt = nextpkt;
3849			goto dropit;
3850		}
3851		bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN);
3852
3853		/*
3854		 * The previous two M_PREPEND could have inserted one or two
3855		 * mbufs in front so we have to update the previous packet's
3856		 * m_nextpkt.
3857		 */
3858		mcur->m_nextpkt = nextpkt;
3859		if (mprev != NULL)
3860			mprev->m_nextpkt = mcur;
3861		else {
3862			/* The first mbuf in the original chain needs to be
3863			 * updated. */
3864			*mp = mcur;
3865		}
3866		mprev = mcur;
3867	}
3868
3869	KMOD_IPSTAT_INC(ips_fragmented);
3870	return (error);
3871
3872dropit:
3873	for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */
3874		m = mcur->m_nextpkt;
3875		m_freem(mcur);
3876	}
3877	return (error);
3878}
3879#endif /* INET */
3880
3881static void
3882bridge_linkstate(struct ifnet *ifp)
3883{
3884	struct bridge_softc *sc = ifp->if_bridge;
3885	struct bridge_iflist *bif;
3886	struct epoch_tracker et;
3887
3888	NET_EPOCH_ENTER(et);
3889
3890	bif = bridge_lookup_member_if(sc, ifp);
3891	if (bif == NULL) {
3892		NET_EPOCH_EXIT(et);
3893		return;
3894	}
3895	bridge_linkcheck(sc);
3896
3897	bstp_linkstate(&bif->bif_stp);
3898
3899	NET_EPOCH_EXIT(et);
3900}
3901
3902static void
3903bridge_linkcheck(struct bridge_softc *sc)
3904{
3905	struct bridge_iflist *bif;
3906	int new_link, hasls;
3907
3908	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
3909
3910	new_link = LINK_STATE_DOWN;
3911	hasls = 0;
3912	/* Our link is considered up if at least one of our ports is active */
3913	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
3914		if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE)
3915			hasls++;
3916		if (bif->bif_ifp->if_link_state == LINK_STATE_UP) {
3917			new_link = LINK_STATE_UP;
3918			break;
3919		}
3920	}
3921	if (!CK_LIST_EMPTY(&sc->sc_iflist) && !hasls) {
3922		/* If no interfaces support link-state then we default to up */
3923		new_link = LINK_STATE_UP;
3924	}
3925	if_link_state_change(sc->sc_ifp, new_link);
3926}
3927