1/*	$OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD$");
22
23#include "opt_inet.h"
24#include "opt_inet6.h"
25
26#include <sys/param.h>
27#include <sys/kernel.h>
28#include <sys/malloc.h>
29#include <sys/mbuf.h>
30#include <sys/queue.h>
31#include <sys/socket.h>
32#include <sys/sockio.h>
33#include <sys/sysctl.h>
34#include <sys/module.h>
35#include <sys/priv.h>
36#include <sys/systm.h>
37#include <sys/proc.h>
38#include <sys/hash.h>
39#include <sys/lock.h>
40#include <sys/rmlock.h>
41#include <sys/taskqueue.h>
42#include <sys/eventhandler.h>
43
44#include <net/ethernet.h>
45#include <net/if.h>
46#include <net/if_clone.h>
47#include <net/if_arp.h>
48#include <net/if_dl.h>
49#include <net/if_llc.h>
50#include <net/if_media.h>
51#include <net/if_types.h>
52#include <net/if_var.h>
53#include <net/bpf.h>
54
55#if defined(INET) || defined(INET6)
56#include <netinet/in.h>
57#include <netinet/ip.h>
58#endif
59#ifdef INET
60#include <netinet/in_systm.h>
61#include <netinet/if_ether.h>
62#endif
63
64#ifdef INET6
65#include <netinet/ip6.h>
66#include <netinet6/in6_var.h>
67#include <netinet6/in6_ifattach.h>
68#endif
69
70#include <net/if_vlan_var.h>
71#include <net/if_lagg.h>
72#include <net/ieee8023ad_lacp.h>
73
74/* Special flags we should propagate to the lagg ports. */
75static struct {
76	int flag;
77	int (*func)(struct ifnet *, int);
78} lagg_pflags[] = {
79	{IFF_PROMISC, ifpromisc},
80	{IFF_ALLMULTI, if_allmulti},
81	{0, NULL}
82};
83
84SLIST_HEAD(__trhead, lagg_softc) lagg_list;	/* list of laggs */
85static struct mtx	lagg_list_mtx;
86eventhandler_tag	lagg_detach_cookie = NULL;
87
88static int	lagg_clone_create(struct if_clone *, int, caddr_t);
89static void	lagg_clone_destroy(struct ifnet *);
90static struct if_clone *lagg_cloner;
91static const char laggname[] = "lagg";
92
93static void	lagg_lladdr(struct lagg_softc *, uint8_t *);
94static void	lagg_capabilities(struct lagg_softc *);
95static void	lagg_port_lladdr(struct lagg_port *, uint8_t *);
96static void	lagg_port_setlladdr(void *, int);
97static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
98static int	lagg_port_destroy(struct lagg_port *, int);
99static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
100static void	lagg_linkstate(struct lagg_softc *);
101static void	lagg_port_state(struct ifnet *, int);
102static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
103static int	lagg_port_output(struct ifnet *, struct mbuf *,
104		    const struct sockaddr *, struct route *);
105static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
106#ifdef LAGG_PORT_STACKING
107static int	lagg_port_checkstacking(struct lagg_softc *);
108#endif
109static void	lagg_port2req(struct lagg_port *, struct lagg_reqport *);
110static void	lagg_init(void *);
111static void	lagg_stop(struct lagg_softc *);
112static int	lagg_ioctl(struct ifnet *, u_long, caddr_t);
113static int	lagg_ether_setmulti(struct lagg_softc *);
114static int	lagg_ether_cmdmulti(struct lagg_port *, int);
115static	int	lagg_setflag(struct lagg_port *, int, int,
116		    int (*func)(struct ifnet *, int));
117static	int	lagg_setflags(struct lagg_port *, int status);
118static int	lagg_transmit(struct ifnet *, struct mbuf *);
119static void	lagg_qflush(struct ifnet *);
120static int	lagg_media_change(struct ifnet *);
121static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
122static struct lagg_port *lagg_link_active(struct lagg_softc *,
123	    struct lagg_port *);
124static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
125static int	lagg_sysctl_active(SYSCTL_HANDLER_ARGS);
126
127/* Simple round robin */
128static int	lagg_rr_attach(struct lagg_softc *);
129static int	lagg_rr_detach(struct lagg_softc *);
130static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
131static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
132		    struct mbuf *);
133
134/* Active failover */
135static int	lagg_fail_attach(struct lagg_softc *);
136static int	lagg_fail_detach(struct lagg_softc *);
137static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
138static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
139		    struct mbuf *);
140
141/* Loadbalancing */
142static int	lagg_lb_attach(struct lagg_softc *);
143static int	lagg_lb_detach(struct lagg_softc *);
144static int	lagg_lb_port_create(struct lagg_port *);
145static void	lagg_lb_port_destroy(struct lagg_port *);
146static int	lagg_lb_start(struct lagg_softc *, struct mbuf *);
147static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
148		    struct mbuf *);
149static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
150
151/* 802.3ad LACP */
152static int	lagg_lacp_attach(struct lagg_softc *);
153static int	lagg_lacp_detach(struct lagg_softc *);
154static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
155static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
156		    struct mbuf *);
157static void	lagg_lacp_lladdr(struct lagg_softc *);
158
159static void	lagg_callout(void *);
160
161/* lagg protocol table */
162static const struct {
163	int			ti_proto;
164	int			(*ti_attach)(struct lagg_softc *);
165} lagg_protos[] = {
166	{ LAGG_PROTO_ROUNDROBIN,	lagg_rr_attach },
167	{ LAGG_PROTO_FAILOVER,		lagg_fail_attach },
168	{ LAGG_PROTO_LOADBALANCE,	lagg_lb_attach },
169	{ LAGG_PROTO_ETHERCHANNEL,	lagg_lb_attach },
170	{ LAGG_PROTO_LACP,		lagg_lacp_attach },
171	{ LAGG_PROTO_NONE,		NULL }
172};
173
174SYSCTL_DECL(_net_link);
175SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
176    "Link Aggregation");
177
178static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
179SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
180    &lagg_failover_rx_all, 0,
181    "Accept input from any interface in a failover lagg");
182static int def_use_flowid = 1; /* Default value for using M_FLOWID */
183TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
184SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
185    &def_use_flowid, 0,
186    "Default setting for using flow id for load sharing");
187static int def_flowid_shift = 16; /* Default value for using M_FLOWID */
188TUNABLE_INT("net.link.lagg.default_flowid_shift", &def_flowid_shift);
189SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RW,
190    &def_flowid_shift, 0,
191    "Default setting for flowid shift for load sharing");
192
193static int
194lagg_modevent(module_t mod, int type, void *data)
195{
196
197	switch (type) {
198	case MOD_LOAD:
199		mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
200		SLIST_INIT(&lagg_list);
201		lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
202		    lagg_clone_destroy, 0);
203		lagg_input_p = lagg_input;
204		lagg_linkstate_p = lagg_port_state;
205		lagg_detach_cookie = EVENTHANDLER_REGISTER(
206		    ifnet_departure_event, lagg_port_ifdetach, NULL,
207		    EVENTHANDLER_PRI_ANY);
208		break;
209	case MOD_UNLOAD:
210		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
211		    lagg_detach_cookie);
212		if_clone_detach(lagg_cloner);
213		lagg_input_p = NULL;
214		lagg_linkstate_p = NULL;
215		mtx_destroy(&lagg_list_mtx);
216		break;
217	default:
218		return (EOPNOTSUPP);
219	}
220	return (0);
221}
222
223static moduledata_t lagg_mod = {
224	"if_lagg",
225	lagg_modevent,
226	0
227};
228
229DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
230MODULE_VERSION(if_lagg, 1);
231
232/*
233 * This routine is run via an vlan
234 * config EVENT
235 */
236static void
237lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
238{
239        struct lagg_softc       *sc = ifp->if_softc;
240        struct lagg_port        *lp;
241        struct rm_priotracker   tracker;
242
243        if (ifp->if_softc !=  arg)   /* Not our event */
244                return;
245
246        LAGG_RLOCK(sc, &tracker);
247        if (!SLIST_EMPTY(&sc->sc_ports)) {
248                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
249                        EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
250        }
251        LAGG_RUNLOCK(sc, &tracker);
252}
253
254/*
255 * This routine is run via an vlan
256 * unconfig EVENT
257 */
258static void
259lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
260{
261        struct lagg_softc       *sc = ifp->if_softc;
262        struct lagg_port        *lp;
263        struct rm_priotracker   tracker;
264
265        if (ifp->if_softc !=  arg)   /* Not our event */
266                return;
267
268        LAGG_RLOCK(sc, &tracker);
269        if (!SLIST_EMPTY(&sc->sc_ports)) {
270                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
271                        EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
272        }
273        LAGG_RUNLOCK(sc, &tracker);
274}
275
276static int
277lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
278{
279	struct lagg_softc *sc;
280	struct ifnet *ifp;
281	int i, error = 0;
282	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
283	struct sysctl_oid *oid;
284	char num[14];			/* sufficient for 32 bits */
285
286	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
287	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
288	if (ifp == NULL) {
289		free(sc, M_DEVBUF);
290		return (ENOSPC);
291	}
292
293	sc->sc_ipackets = counter_u64_alloc(M_WAITOK);
294	sc->sc_opackets = counter_u64_alloc(M_WAITOK);
295	sc->sc_ibytes = counter_u64_alloc(M_WAITOK);
296	sc->sc_obytes = counter_u64_alloc(M_WAITOK);
297
298	sysctl_ctx_init(&sc->ctx);
299	snprintf(num, sizeof(num), "%u", unit);
300	sc->use_flowid = def_use_flowid;
301	sc->flowid_shift = def_flowid_shift;
302	sc->sc_oid = oid = SYSCTL_ADD_NODE(&sc->ctx,
303		&SYSCTL_NODE_CHILDREN(_net_link, lagg),
304		OID_AUTO, num, CTLFLAG_RD, NULL, "");
305	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
306		"use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid,
307		sc->use_flowid, "Use flow id for load sharing");
308	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
309		"flowid_shift", CTLTYPE_INT|CTLFLAG_RW, &sc->flowid_shift,
310		sc->flowid_shift,
311		"Shift flowid bits to prevent multiqueue collisions");
312	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
313		"count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count,
314		"Total number of ports");
315	SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
316		"active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active,
317		"I", "Total number of active ports");
318	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
319		"flapping", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_flapping,
320		sc->sc_flapping, "Total number of port change events");
321	/* Hash all layers by default */
322	sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
323
324	sc->sc_proto = LAGG_PROTO_NONE;
325	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
326		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
327			sc->sc_proto = lagg_protos[i].ti_proto;
328			if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
329				if_free(ifp);
330				free(sc, M_DEVBUF);
331				return (error);
332			}
333			break;
334		}
335	}
336	LAGG_LOCK_INIT(sc);
337	LAGG_CALLOUT_LOCK_INIT(sc);
338	SLIST_INIT(&sc->sc_ports);
339	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
340
341	/*
342	 * This uses the callout lock rather than the rmlock; one can't
343	 * hold said rmlock during SWI.
344	 */
345	callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
346
347	/* Initialise pseudo media types */
348	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
349	    lagg_media_status);
350	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
351	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
352
353	if_initname(ifp, laggname, unit);
354	ifp->if_softc = sc;
355	ifp->if_transmit = lagg_transmit;
356	ifp->if_qflush = lagg_qflush;
357	ifp->if_init = lagg_init;
358	ifp->if_ioctl = lagg_ioctl;
359	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
360	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
361
362	/*
363	 * Attach as an ordinary ethernet device, children will be attached
364	 * as special device IFT_IEEE8023ADLAG.
365	 */
366	ether_ifattach(ifp, eaddr);
367
368	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
369		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
370	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
371		lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
372
373	/* Insert into the global list of laggs */
374	mtx_lock(&lagg_list_mtx);
375	SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
376	mtx_unlock(&lagg_list_mtx);
377
378	callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
379
380	return (0);
381}
382
383static void
384lagg_clone_destroy(struct ifnet *ifp)
385{
386	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
387	struct lagg_port *lp;
388
389	LAGG_WLOCK(sc);
390
391	lagg_stop(sc);
392	ifp->if_flags &= ~IFF_UP;
393
394	EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
395	EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
396
397	/* Shutdown and remove lagg ports */
398	while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
399		lagg_port_destroy(lp, 1);
400	/* Unhook the aggregation protocol */
401	if (sc->sc_detach != NULL)
402		(*sc->sc_detach)(sc);
403
404	LAGG_WUNLOCK(sc);
405
406	sysctl_ctx_free(&sc->ctx);
407	ifmedia_removeall(&sc->sc_media);
408	ether_ifdetach(ifp);
409	if_free(ifp);
410
411	/* This grabs sc_callout_mtx, serialising it correctly */
412	callout_drain(&sc->sc_callout);
413
414	/* At this point it's drained; we can free this */
415	counter_u64_free(sc->sc_ipackets);
416	counter_u64_free(sc->sc_opackets);
417	counter_u64_free(sc->sc_ibytes);
418	counter_u64_free(sc->sc_obytes);
419
420	mtx_lock(&lagg_list_mtx);
421	SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
422	mtx_unlock(&lagg_list_mtx);
423
424	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
425	LAGG_LOCK_DESTROY(sc);
426	LAGG_CALLOUT_LOCK_DESTROY(sc);
427	free(sc, M_DEVBUF);
428}
429
430static void
431lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
432{
433	struct ifnet *ifp = sc->sc_ifp;
434
435	if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
436		return;
437
438	bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
439	/* Let the protocol know the MAC has changed */
440	if (sc->sc_lladdr != NULL)
441		(*sc->sc_lladdr)(sc);
442	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
443}
444
445static void
446lagg_capabilities(struct lagg_softc *sc)
447{
448	struct lagg_port *lp;
449	int cap = ~0, ena = ~0;
450	u_long hwa = ~0UL;
451#if defined(INET) || defined(INET6)
452	u_int hw_tsomax = IP_MAXPACKET;	/* Initialize to the maximum value. */
453#else
454	u_int hw_tsomax = ~0;	/* if_hw_tsomax is only for INET/INET6, but.. */
455#endif
456
457	LAGG_WLOCK_ASSERT(sc);
458
459	/* Get capabilities from the lagg ports */
460	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
461		cap &= lp->lp_ifp->if_capabilities;
462		ena &= lp->lp_ifp->if_capenable;
463		hwa &= lp->lp_ifp->if_hwassist;
464		/* Set to the minimum value of the lagg ports. */
465		if (lp->lp_ifp->if_hw_tsomax < hw_tsomax &&
466		    lp->lp_ifp->if_hw_tsomax > 0)
467			hw_tsomax = lp->lp_ifp->if_hw_tsomax;
468	}
469	cap = (cap == ~0 ? 0 : cap);
470	ena = (ena == ~0 ? 0 : ena);
471	hwa = (hwa == ~0 ? 0 : hwa);
472
473	if (sc->sc_ifp->if_capabilities != cap ||
474	    sc->sc_ifp->if_capenable != ena ||
475	    sc->sc_ifp->if_hwassist != hwa ||
476	    sc->sc_ifp->if_hw_tsomax != hw_tsomax) {
477		sc->sc_ifp->if_capabilities = cap;
478		sc->sc_ifp->if_capenable = ena;
479		sc->sc_ifp->if_hwassist = hwa;
480		sc->sc_ifp->if_hw_tsomax = hw_tsomax;
481		getmicrotime(&sc->sc_ifp->if_lastchange);
482
483		if (sc->sc_ifflags & IFF_DEBUG)
484			if_printf(sc->sc_ifp,
485			    "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
486	}
487}
488
489static void
490lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
491{
492	struct lagg_softc *sc = lp->lp_softc;
493	struct ifnet *ifp = lp->lp_ifp;
494	struct lagg_llq *llq;
495	int pending = 0;
496
497	LAGG_WLOCK_ASSERT(sc);
498
499	if (lp->lp_detaching ||
500	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
501		return;
502
503	/* Check to make sure its not already queued to be changed */
504	SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
505		if (llq->llq_ifp == ifp) {
506			pending = 1;
507			break;
508		}
509	}
510
511	if (!pending) {
512		llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
513		if (llq == NULL)	/* XXX what to do */
514			return;
515	}
516
517	/* Update the lladdr even if pending, it may have changed */
518	llq->llq_ifp = ifp;
519	bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
520
521	if (!pending)
522		SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
523
524	taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
525}
526
527/*
528 * Set the interface MAC address from a taskqueue to avoid a LOR.
529 */
530static void
531lagg_port_setlladdr(void *arg, int pending)
532{
533	struct lagg_softc *sc = (struct lagg_softc *)arg;
534	struct lagg_llq *llq, *head;
535	struct ifnet *ifp;
536	int error;
537
538	/* Grab a local reference of the queue and remove it from the softc */
539	LAGG_WLOCK(sc);
540	head = SLIST_FIRST(&sc->sc_llq_head);
541	SLIST_FIRST(&sc->sc_llq_head) = NULL;
542	LAGG_WUNLOCK(sc);
543
544	/*
545	 * Traverse the queue and set the lladdr on each ifp. It is safe to do
546	 * unlocked as we have the only reference to it.
547	 */
548	for (llq = head; llq != NULL; llq = head) {
549		ifp = llq->llq_ifp;
550
551		/* Set the link layer address */
552		CURVNET_SET(ifp->if_vnet);
553		error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
554		CURVNET_RESTORE();
555		if (error)
556			printf("%s: setlladdr failed on %s\n", __func__,
557			    ifp->if_xname);
558
559		head = SLIST_NEXT(llq, llq_entries);
560		free(llq, M_DEVBUF);
561	}
562}
563
564static int
565lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
566{
567	struct lagg_softc *sc_ptr;
568	struct lagg_port *lp;
569	int error = 0;
570
571	LAGG_WLOCK_ASSERT(sc);
572
573	/* Limit the maximal number of lagg ports */
574	if (sc->sc_count >= LAGG_MAX_PORTS)
575		return (ENOSPC);
576
577	/* Check if port has already been associated to a lagg */
578	if (ifp->if_lagg != NULL) {
579		/* Port is already in the current lagg? */
580		lp = (struct lagg_port *)ifp->if_lagg;
581		if (lp->lp_softc == sc)
582			return (EEXIST);
583		return (EBUSY);
584	}
585
586	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
587	if (ifp->if_type != IFT_ETHER)
588		return (EPROTONOSUPPORT);
589
590#ifdef INET6
591	/*
592	 * The member interface should not have inet6 address because
593	 * two interfaces with a valid link-local scope zone must not be
594	 * merged in any form.  This restriction is needed to
595	 * prevent violation of link-local scope zone.  Attempts to
596	 * add a member interface which has inet6 addresses triggers
597	 * removal of all inet6 addresses on the member interface.
598	 */
599	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
600		if (in6ifa_llaonifp(lp->lp_ifp)) {
601			in6_ifdetach(lp->lp_ifp);
602			if_printf(sc->sc_ifp,
603			    "IPv6 addresses on %s have been removed "
604			    "before adding it as a member to prevent "
605			    "IPv6 address scope violation.\n",
606			    lp->lp_ifp->if_xname);
607		}
608	}
609	if (in6ifa_llaonifp(ifp)) {
610		in6_ifdetach(ifp);
611		if_printf(sc->sc_ifp,
612		    "IPv6 addresses on %s have been removed "
613		    "before adding it as a member to prevent "
614		    "IPv6 address scope violation.\n",
615		    ifp->if_xname);
616	}
617#endif
618	/* Allow the first Ethernet member to define the MTU */
619	if (SLIST_EMPTY(&sc->sc_ports))
620		sc->sc_ifp->if_mtu = ifp->if_mtu;
621	else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
622		if_printf(sc->sc_ifp, "invalid MTU for %s\n",
623		    ifp->if_xname);
624		return (EINVAL);
625	}
626
627	if ((lp = malloc(sizeof(struct lagg_port),
628	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
629		return (ENOMEM);
630
631	/* Check if port is a stacked lagg */
632	mtx_lock(&lagg_list_mtx);
633	SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
634		if (ifp == sc_ptr->sc_ifp) {
635			mtx_unlock(&lagg_list_mtx);
636			free(lp, M_DEVBUF);
637			return (EINVAL);
638			/* XXX disable stacking for the moment, its untested */
639#ifdef LAGG_PORT_STACKING
640			lp->lp_flags |= LAGG_PORT_STACK;
641			if (lagg_port_checkstacking(sc_ptr) >=
642			    LAGG_MAX_STACKING) {
643				mtx_unlock(&lagg_list_mtx);
644				free(lp, M_DEVBUF);
645				return (E2BIG);
646			}
647#endif
648		}
649	}
650	mtx_unlock(&lagg_list_mtx);
651
652	/* Change the interface type */
653	lp->lp_iftype = ifp->if_type;
654	ifp->if_type = IFT_IEEE8023ADLAG;
655	ifp->if_lagg = lp;
656	lp->lp_ioctl = ifp->if_ioctl;
657	ifp->if_ioctl = lagg_port_ioctl;
658	lp->lp_output = ifp->if_output;
659	ifp->if_output = lagg_port_output;
660
661	lp->lp_ifp = ifp;
662	lp->lp_softc = sc;
663
664	/* Save port link layer address */
665	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
666
667	if (SLIST_EMPTY(&sc->sc_ports)) {
668		sc->sc_primary = lp;
669		lagg_lladdr(sc, IF_LLADDR(ifp));
670	} else {
671		/* Update link layer address for this port */
672		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
673	}
674
675	/* Insert into the list of ports */
676	SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
677	sc->sc_count++;
678
679	/* Update lagg capabilities */
680	lagg_capabilities(sc);
681	lagg_linkstate(sc);
682
683	/* Add multicast addresses and interface flags to this port */
684	lagg_ether_cmdmulti(lp, 1);
685	lagg_setflags(lp, 1);
686
687	if (sc->sc_port_create != NULL)
688		error = (*sc->sc_port_create)(lp);
689	if (error) {
690		/* remove the port again, without calling sc_port_destroy */
691		lagg_port_destroy(lp, 0);
692		return (error);
693	}
694
695	return (error);
696}
697
698#ifdef LAGG_PORT_STACKING
699static int
700lagg_port_checkstacking(struct lagg_softc *sc)
701{
702	struct lagg_softc *sc_ptr;
703	struct lagg_port *lp;
704	int m = 0;
705
706	LAGG_WLOCK_ASSERT(sc);
707
708	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
709		if (lp->lp_flags & LAGG_PORT_STACK) {
710			sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
711			m = MAX(m, lagg_port_checkstacking(sc_ptr));
712		}
713	}
714
715	return (m + 1);
716}
717#endif
718
719static int
720lagg_port_destroy(struct lagg_port *lp, int runpd)
721{
722	struct lagg_softc *sc = lp->lp_softc;
723	struct lagg_port *lp_ptr;
724	struct lagg_llq *llq;
725	struct ifnet *ifp = lp->lp_ifp;
726
727	LAGG_WLOCK_ASSERT(sc);
728
729	if (runpd && sc->sc_port_destroy != NULL)
730		(*sc->sc_port_destroy)(lp);
731
732	/*
733	 * Remove multicast addresses and interface flags from this port and
734	 * reset the MAC address, skip if the interface is being detached.
735	 */
736	if (!lp->lp_detaching) {
737		lagg_ether_cmdmulti(lp, 0);
738		lagg_setflags(lp, 0);
739		lagg_port_lladdr(lp, lp->lp_lladdr);
740	}
741
742	/* Restore interface */
743	ifp->if_type = lp->lp_iftype;
744	ifp->if_ioctl = lp->lp_ioctl;
745	ifp->if_output = lp->lp_output;
746	ifp->if_lagg = NULL;
747
748	/* Finally, remove the port from the lagg */
749	SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
750	sc->sc_count--;
751
752	/* Update the primary interface */
753	if (lp == sc->sc_primary) {
754		uint8_t lladdr[ETHER_ADDR_LEN];
755
756		if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
757			bzero(&lladdr, ETHER_ADDR_LEN);
758		} else {
759			bcopy(lp_ptr->lp_lladdr,
760			    lladdr, ETHER_ADDR_LEN);
761		}
762		lagg_lladdr(sc, lladdr);
763		sc->sc_primary = lp_ptr;
764
765		/* Update link layer address for each port */
766		SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
767			lagg_port_lladdr(lp_ptr, lladdr);
768	}
769
770	/* Remove any pending lladdr changes from the queue */
771	if (lp->lp_detaching) {
772		SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
773			if (llq->llq_ifp == ifp) {
774				SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
775				    llq_entries);
776				free(llq, M_DEVBUF);
777				break;	/* Only appears once */
778			}
779		}
780	}
781
782	if (lp->lp_ifflags)
783		if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
784
785	free(lp, M_DEVBUF);
786
787	/* Update lagg capabilities */
788	lagg_capabilities(sc);
789	lagg_linkstate(sc);
790
791	return (0);
792}
793
794static int
795lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
796{
797	struct lagg_reqport *rp = (struct lagg_reqport *)data;
798	struct lagg_softc *sc;
799	struct lagg_port *lp = NULL;
800	int error = 0;
801	struct rm_priotracker tracker;
802
803	/* Should be checked by the caller */
804	if (ifp->if_type != IFT_IEEE8023ADLAG ||
805	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
806		goto fallback;
807
808	switch (cmd) {
809	case SIOCGLAGGPORT:
810		if (rp->rp_portname[0] == '\0' ||
811		    ifunit(rp->rp_portname) != ifp) {
812			error = EINVAL;
813			break;
814		}
815
816		LAGG_RLOCK(sc, &tracker);
817		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
818			error = ENOENT;
819			LAGG_RUNLOCK(sc, &tracker);
820			break;
821		}
822
823		lagg_port2req(lp, rp);
824		LAGG_RUNLOCK(sc, &tracker);
825		break;
826
827	case SIOCSIFCAP:
828		if (lp->lp_ioctl == NULL) {
829			error = EINVAL;
830			break;
831		}
832		error = (*lp->lp_ioctl)(ifp, cmd, data);
833		if (error)
834			break;
835
836		/* Update lagg interface capabilities */
837		LAGG_WLOCK(sc);
838		lagg_capabilities(sc);
839		LAGG_WUNLOCK(sc);
840		break;
841
842	case SIOCSIFMTU:
843		/* Do not allow the MTU to be changed once joined */
844		error = EINVAL;
845		break;
846
847	default:
848		goto fallback;
849	}
850
851	return (error);
852
853fallback:
854	if (lp->lp_ioctl != NULL)
855		return ((*lp->lp_ioctl)(ifp, cmd, data));
856
857	return (EINVAL);
858}
859
860/*
861 * For direct output to child ports.
862 */
863static int
864lagg_port_output(struct ifnet *ifp, struct mbuf *m,
865	const struct sockaddr *dst, struct route *ro)
866{
867	struct lagg_port *lp = ifp->if_lagg;
868
869	switch (dst->sa_family) {
870		case pseudo_AF_HDRCMPLT:
871		case AF_UNSPEC:
872			return ((*lp->lp_output)(ifp, m, dst, ro));
873	}
874
875	/* drop any other frames */
876	m_freem(m);
877	return (ENETDOWN);
878}
879
880static void
881lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
882{
883	struct lagg_port *lp;
884	struct lagg_softc *sc;
885
886	if ((lp = ifp->if_lagg) == NULL)
887		return;
888	/* If the ifnet is just being renamed, don't do anything. */
889	if (ifp->if_flags & IFF_RENAMING)
890		return;
891
892	sc = lp->lp_softc;
893
894	LAGG_WLOCK(sc);
895	lp->lp_detaching = 1;
896	lagg_port_destroy(lp, 1);
897	LAGG_WUNLOCK(sc);
898}
899
900static void
901lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
902{
903	struct lagg_softc *sc = lp->lp_softc;
904
905	strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
906	strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
907	rp->rp_prio = lp->lp_prio;
908	rp->rp_flags = lp->lp_flags;
909	if (sc->sc_portreq != NULL)
910		(*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
911
912	/* Add protocol specific flags */
913	switch (sc->sc_proto) {
914		case LAGG_PROTO_FAILOVER:
915			if (lp == sc->sc_primary)
916				rp->rp_flags |= LAGG_PORT_MASTER;
917			if (lp == lagg_link_active(sc, sc->sc_primary))
918				rp->rp_flags |= LAGG_PORT_ACTIVE;
919			break;
920
921		case LAGG_PROTO_ROUNDROBIN:
922		case LAGG_PROTO_LOADBALANCE:
923		case LAGG_PROTO_ETHERCHANNEL:
924			if (LAGG_PORTACTIVE(lp))
925				rp->rp_flags |= LAGG_PORT_ACTIVE;
926			break;
927
928		case LAGG_PROTO_LACP:
929			/* LACP has a different definition of active */
930			if (lacp_isactive(lp))
931				rp->rp_flags |= LAGG_PORT_ACTIVE;
932			if (lacp_iscollecting(lp))
933				rp->rp_flags |= LAGG_PORT_COLLECTING;
934			if (lacp_isdistributing(lp))
935				rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
936			break;
937	}
938
939}
940
941static void
942lagg_init(void *xsc)
943{
944	struct lagg_softc *sc = (struct lagg_softc *)xsc;
945	struct lagg_port *lp;
946	struct ifnet *ifp = sc->sc_ifp;
947
948	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
949		return;
950
951	LAGG_WLOCK(sc);
952
953	ifp->if_drv_flags |= IFF_DRV_RUNNING;
954	/* Update the port lladdrs */
955	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
956		lagg_port_lladdr(lp, IF_LLADDR(ifp));
957
958	if (sc->sc_init != NULL)
959		(*sc->sc_init)(sc);
960
961	LAGG_WUNLOCK(sc);
962}
963
964static void
965lagg_stop(struct lagg_softc *sc)
966{
967	struct ifnet *ifp = sc->sc_ifp;
968
969	LAGG_WLOCK_ASSERT(sc);
970
971	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
972		return;
973
974	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
975
976	if (sc->sc_stop != NULL)
977		(*sc->sc_stop)(sc);
978}
979
980static int
981lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
982{
983	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
984	struct lagg_reqall *ra = (struct lagg_reqall *)data;
985	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
986	struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
987	struct ifreq *ifr = (struct ifreq *)data;
988	struct lagg_port *lp;
989	struct ifnet *tpif;
990	struct thread *td = curthread;
991	char *buf, *outbuf;
992	int count, buflen, len, error = 0;
993	struct rm_priotracker tracker;
994
995	bzero(&rpbuf, sizeof(rpbuf));
996
997	switch (cmd) {
998	case SIOCGLAGG:
999		LAGG_RLOCK(sc, &tracker);
1000		count = 0;
1001		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1002			count++;
1003		buflen = count * sizeof(struct lagg_reqport);
1004		LAGG_RUNLOCK(sc, &tracker);
1005
1006		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
1007
1008		LAGG_RLOCK(sc, &tracker);
1009		ra->ra_proto = sc->sc_proto;
1010		if (sc->sc_req != NULL)
1011			(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
1012
1013		count = 0;
1014		buf = outbuf;
1015		len = min(ra->ra_size, buflen);
1016		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1017			if (len < sizeof(rpbuf))
1018				break;
1019
1020			lagg_port2req(lp, &rpbuf);
1021			memcpy(buf, &rpbuf, sizeof(rpbuf));
1022			count++;
1023			buf += sizeof(rpbuf);
1024			len -= sizeof(rpbuf);
1025		}
1026		LAGG_RUNLOCK(sc, &tracker);
1027		ra->ra_ports = count;
1028		ra->ra_size = count * sizeof(rpbuf);
1029		error = copyout(outbuf, ra->ra_port, ra->ra_size);
1030		free(outbuf, M_TEMP);
1031		break;
1032	case SIOCSLAGG:
1033		error = priv_check(td, PRIV_NET_LAGG);
1034		if (error)
1035			break;
1036		if (ra->ra_proto >= LAGG_PROTO_MAX) {
1037			error = EPROTONOSUPPORT;
1038			break;
1039		}
1040		LAGG_WLOCK(sc);
1041		if (sc->sc_proto != LAGG_PROTO_NONE) {
1042			/* Reset protocol first in case detach unlocks */
1043			sc->sc_proto = LAGG_PROTO_NONE;
1044			error = sc->sc_detach(sc);
1045			sc->sc_detach = NULL;
1046			sc->sc_start = NULL;
1047			sc->sc_input = NULL;
1048			sc->sc_port_create = NULL;
1049			sc->sc_port_destroy = NULL;
1050			sc->sc_linkstate = NULL;
1051			sc->sc_init = NULL;
1052			sc->sc_stop = NULL;
1053			sc->sc_lladdr = NULL;
1054			sc->sc_req = NULL;
1055			sc->sc_portreq = NULL;
1056		} else if (sc->sc_input != NULL) {
1057			/* Still detaching */
1058			error = EBUSY;
1059		}
1060		if (error != 0) {
1061			LAGG_WUNLOCK(sc);
1062			break;
1063		}
1064		for (int i = 0; i < (sizeof(lagg_protos) /
1065		    sizeof(lagg_protos[0])); i++) {
1066			if (lagg_protos[i].ti_proto == ra->ra_proto) {
1067				if (sc->sc_ifflags & IFF_DEBUG)
1068					printf("%s: using proto %u\n",
1069					    sc->sc_ifname,
1070					    lagg_protos[i].ti_proto);
1071				sc->sc_proto = lagg_protos[i].ti_proto;
1072				if (sc->sc_proto != LAGG_PROTO_NONE)
1073					error = lagg_protos[i].ti_attach(sc);
1074				LAGG_WUNLOCK(sc);
1075				return (error);
1076			}
1077		}
1078		LAGG_WUNLOCK(sc);
1079		error = EPROTONOSUPPORT;
1080		break;
1081	case SIOCGLAGGFLAGS:
1082		rf->rf_flags = sc->sc_flags;
1083		break;
1084	case SIOCSLAGGHASH:
1085		error = priv_check(td, PRIV_NET_LAGG);
1086		if (error)
1087			break;
1088		if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
1089			error = EINVAL;
1090			break;
1091		}
1092		LAGG_WLOCK(sc);
1093		sc->sc_flags &= ~LAGG_F_HASHMASK;
1094		sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
1095		LAGG_WUNLOCK(sc);
1096		break;
1097	case SIOCGLAGGPORT:
1098		if (rp->rp_portname[0] == '\0' ||
1099		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1100			error = EINVAL;
1101			break;
1102		}
1103
1104		LAGG_RLOCK(sc, &tracker);
1105		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1106		    lp->lp_softc != sc) {
1107			error = ENOENT;
1108			LAGG_RUNLOCK(sc, &tracker);
1109			break;
1110		}
1111
1112		lagg_port2req(lp, rp);
1113		LAGG_RUNLOCK(sc, &tracker);
1114		break;
1115	case SIOCSLAGGPORT:
1116		error = priv_check(td, PRIV_NET_LAGG);
1117		if (error)
1118			break;
1119		if (rp->rp_portname[0] == '\0' ||
1120		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1121			error = EINVAL;
1122			break;
1123		}
1124		LAGG_WLOCK(sc);
1125		error = lagg_port_create(sc, tpif);
1126		LAGG_WUNLOCK(sc);
1127		break;
1128	case SIOCSLAGGDELPORT:
1129		error = priv_check(td, PRIV_NET_LAGG);
1130		if (error)
1131			break;
1132		if (rp->rp_portname[0] == '\0' ||
1133		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1134			error = EINVAL;
1135			break;
1136		}
1137
1138		LAGG_WLOCK(sc);
1139		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1140		    lp->lp_softc != sc) {
1141			error = ENOENT;
1142			LAGG_WUNLOCK(sc);
1143			break;
1144		}
1145
1146		error = lagg_port_destroy(lp, 1);
1147		LAGG_WUNLOCK(sc);
1148		break;
1149	case SIOCSIFFLAGS:
1150		/* Set flags on ports too */
1151		LAGG_WLOCK(sc);
1152		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1153			lagg_setflags(lp, 1);
1154		}
1155		LAGG_WUNLOCK(sc);
1156
1157		if (!(ifp->if_flags & IFF_UP) &&
1158		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1159			/*
1160			 * If interface is marked down and it is running,
1161			 * then stop and disable it.
1162			 */
1163			LAGG_WLOCK(sc);
1164			lagg_stop(sc);
1165			LAGG_WUNLOCK(sc);
1166		} else if ((ifp->if_flags & IFF_UP) &&
1167		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1168			/*
1169			 * If interface is marked up and it is stopped, then
1170			 * start it.
1171			 */
1172			(*ifp->if_init)(sc);
1173		}
1174		break;
1175	case SIOCADDMULTI:
1176	case SIOCDELMULTI:
1177		LAGG_WLOCK(sc);
1178		error = lagg_ether_setmulti(sc);
1179		LAGG_WUNLOCK(sc);
1180		break;
1181	case SIOCSIFMEDIA:
1182	case SIOCGIFMEDIA:
1183		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1184		break;
1185
1186	case SIOCSIFCAP:
1187	case SIOCSIFMTU:
1188		/* Do not allow the MTU or caps to be directly changed */
1189		error = EINVAL;
1190		break;
1191
1192	default:
1193		error = ether_ioctl(ifp, cmd, data);
1194		break;
1195	}
1196	return (error);
1197}
1198
1199static int
1200lagg_ether_setmulti(struct lagg_softc *sc)
1201{
1202	struct lagg_port *lp;
1203
1204	LAGG_WLOCK_ASSERT(sc);
1205
1206	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1207		/* First, remove any existing filter entries. */
1208		lagg_ether_cmdmulti(lp, 0);
1209		/* copy all addresses from the lagg interface to the port */
1210		lagg_ether_cmdmulti(lp, 1);
1211	}
1212	return (0);
1213}
1214
1215static int
1216lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1217{
1218	struct lagg_softc *sc = lp->lp_softc;
1219	struct ifnet *ifp = lp->lp_ifp;
1220	struct ifnet *scifp = sc->sc_ifp;
1221	struct lagg_mc *mc;
1222	struct ifmultiaddr *ifma;
1223	int error;
1224
1225	LAGG_WLOCK_ASSERT(sc);
1226
1227	if (set) {
1228		IF_ADDR_WLOCK(scifp);
1229		TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1230			if (ifma->ifma_addr->sa_family != AF_LINK)
1231				continue;
1232			mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1233			if (mc == NULL) {
1234				IF_ADDR_WUNLOCK(scifp);
1235				return (ENOMEM);
1236			}
1237			bcopy(ifma->ifma_addr, &mc->mc_addr,
1238			    ifma->ifma_addr->sa_len);
1239			mc->mc_addr.sdl_index = ifp->if_index;
1240			mc->mc_ifma = NULL;
1241			SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
1242		}
1243		IF_ADDR_WUNLOCK(scifp);
1244		SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
1245			error = if_addmulti(ifp,
1246			    (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
1247			if (error)
1248				return (error);
1249		}
1250	} else {
1251		while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
1252			SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
1253			if (mc->mc_ifma && !lp->lp_detaching)
1254				if_delmulti_ifma(mc->mc_ifma);
1255			free(mc, M_DEVBUF);
1256		}
1257	}
1258	return (0);
1259}
1260
1261/* Handle a ref counted flag that should be set on the lagg port as well */
1262static int
1263lagg_setflag(struct lagg_port *lp, int flag, int status,
1264	     int (*func)(struct ifnet *, int))
1265{
1266	struct lagg_softc *sc = lp->lp_softc;
1267	struct ifnet *scifp = sc->sc_ifp;
1268	struct ifnet *ifp = lp->lp_ifp;
1269	int error;
1270
1271	LAGG_WLOCK_ASSERT(sc);
1272
1273	status = status ? (scifp->if_flags & flag) : 0;
1274	/* Now "status" contains the flag value or 0 */
1275
1276	/*
1277	 * See if recorded ports status is different from what
1278	 * we want it to be.  If it is, flip it.  We record ports
1279	 * status in lp_ifflags so that we won't clear ports flag
1280	 * we haven't set.  In fact, we don't clear or set ports
1281	 * flags directly, but get or release references to them.
1282	 * That's why we can be sure that recorded flags still are
1283	 * in accord with actual ports flags.
1284	 */
1285	if (status != (lp->lp_ifflags & flag)) {
1286		error = (*func)(ifp, status);
1287		if (error)
1288			return (error);
1289		lp->lp_ifflags &= ~flag;
1290		lp->lp_ifflags |= status;
1291	}
1292	return (0);
1293}
1294
1295/*
1296 * Handle IFF_* flags that require certain changes on the lagg port
1297 * if "status" is true, update ports flags respective to the lagg
1298 * if "status" is false, forcedly clear the flags set on port.
1299 */
1300static int
1301lagg_setflags(struct lagg_port *lp, int status)
1302{
1303	int error, i;
1304
1305	for (i = 0; lagg_pflags[i].flag; i++) {
1306		error = lagg_setflag(lp, lagg_pflags[i].flag,
1307		    status, lagg_pflags[i].func);
1308		if (error)
1309			return (error);
1310	}
1311	return (0);
1312}
1313
1314static int
1315lagg_transmit(struct ifnet *ifp, struct mbuf *m)
1316{
1317	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1318	int error, len, mcast;
1319	struct rm_priotracker tracker;
1320
1321	len = m->m_pkthdr.len;
1322	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1323
1324	LAGG_RLOCK(sc, &tracker);
1325	/* We need a Tx algorithm and at least one port */
1326	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
1327		LAGG_RUNLOCK(sc, &tracker);
1328		m_freem(m);
1329		ifp->if_oerrors++;
1330		return (ENXIO);
1331	}
1332
1333	ETHER_BPF_MTAP(ifp, m);
1334
1335	error = (*sc->sc_start)(sc, m);
1336	LAGG_RUNLOCK(sc, &tracker);
1337
1338	if (error == 0) {
1339		counter_u64_add(sc->sc_opackets, 1);
1340		counter_u64_add(sc->sc_obytes, len);
1341		ifp->if_omcasts += mcast;
1342	} else
1343		ifp->if_oerrors++;
1344
1345	return (error);
1346}
1347
1348/*
1349 * The ifp->if_qflush entry point for lagg(4) is no-op.
1350 */
1351static void
1352lagg_qflush(struct ifnet *ifp __unused)
1353{
1354}
1355
1356static struct mbuf *
1357lagg_input(struct ifnet *ifp, struct mbuf *m)
1358{
1359	struct lagg_port *lp = ifp->if_lagg;
1360	struct lagg_softc *sc = lp->lp_softc;
1361	struct ifnet *scifp = sc->sc_ifp;
1362	struct rm_priotracker tracker;
1363
1364	LAGG_RLOCK(sc, &tracker);
1365	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1366	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
1367	    sc->sc_proto == LAGG_PROTO_NONE) {
1368		LAGG_RUNLOCK(sc, &tracker);
1369		m_freem(m);
1370		return (NULL);
1371	}
1372
1373	ETHER_BPF_MTAP(scifp, m);
1374
1375	m = (*sc->sc_input)(sc, lp, m);
1376
1377	if (m != NULL) {
1378		counter_u64_add(sc->sc_ipackets, 1);
1379		counter_u64_add(sc->sc_ibytes, m->m_pkthdr.len);
1380
1381		if (scifp->if_flags & IFF_MONITOR) {
1382			m_freem(m);
1383			m = NULL;
1384		}
1385	}
1386
1387	LAGG_RUNLOCK(sc, &tracker);
1388	return (m);
1389}
1390
1391static int
1392lagg_media_change(struct ifnet *ifp)
1393{
1394	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1395
1396	if (sc->sc_ifflags & IFF_DEBUG)
1397		printf("%s\n", __func__);
1398
1399	/* Ignore */
1400	return (0);
1401}
1402
1403static void
1404lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1405{
1406	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1407	struct lagg_port *lp;
1408	struct rm_priotracker tracker;
1409
1410	imr->ifm_status = IFM_AVALID;
1411	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1412
1413	LAGG_RLOCK(sc, &tracker);
1414	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1415		if (LAGG_PORTACTIVE(lp))
1416			imr->ifm_status |= IFM_ACTIVE;
1417	}
1418	LAGG_RUNLOCK(sc, &tracker);
1419}
1420
1421static void
1422lagg_linkstate(struct lagg_softc *sc)
1423{
1424	struct lagg_port *lp;
1425	int new_link = LINK_STATE_DOWN;
1426	uint64_t speed;
1427
1428	/* Our link is considered up if at least one of our ports is active */
1429	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1430		if (lp->lp_link_state == LINK_STATE_UP) {
1431			new_link = LINK_STATE_UP;
1432			break;
1433		}
1434	}
1435	if_link_state_change(sc->sc_ifp, new_link);
1436
1437	/* Update if_baudrate to reflect the max possible speed */
1438	switch (sc->sc_proto) {
1439		case LAGG_PROTO_FAILOVER:
1440			sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
1441			    sc->sc_primary->lp_ifp->if_baudrate : 0;
1442			break;
1443		case LAGG_PROTO_ROUNDROBIN:
1444		case LAGG_PROTO_LOADBALANCE:
1445		case LAGG_PROTO_ETHERCHANNEL:
1446			speed = 0;
1447			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1448				speed += lp->lp_ifp->if_baudrate;
1449			sc->sc_ifp->if_baudrate = speed;
1450			break;
1451		case LAGG_PROTO_LACP:
1452			/* LACP updates if_baudrate itself */
1453			break;
1454	}
1455}
1456
1457static void
1458lagg_port_state(struct ifnet *ifp, int state)
1459{
1460	struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1461	struct lagg_softc *sc = NULL;
1462
1463	if (lp != NULL)
1464		sc = lp->lp_softc;
1465	if (sc == NULL)
1466		return;
1467
1468	LAGG_WLOCK(sc);
1469	lagg_linkstate(sc);
1470	if (sc->sc_linkstate != NULL)
1471		(*sc->sc_linkstate)(lp);
1472	LAGG_WUNLOCK(sc);
1473}
1474
1475struct lagg_port *
1476lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1477{
1478	struct lagg_port *lp_next, *rval = NULL;
1479	// int new_link = LINK_STATE_DOWN;
1480
1481	LAGG_RLOCK_ASSERT(sc);
1482	/*
1483	 * Search a port which reports an active link state.
1484	 */
1485
1486	if (lp == NULL)
1487		goto search;
1488	if (LAGG_PORTACTIVE(lp)) {
1489		rval = lp;
1490		goto found;
1491	}
1492	if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1493	    LAGG_PORTACTIVE(lp_next)) {
1494		rval = lp_next;
1495		goto found;
1496	}
1497
1498search:
1499	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1500		if (LAGG_PORTACTIVE(lp_next)) {
1501			rval = lp_next;
1502			goto found;
1503		}
1504	}
1505
1506found:
1507	if (rval != NULL) {
1508		/*
1509		 * The IEEE 802.1D standard assumes that a lagg with
1510		 * multiple ports is always full duplex. This is valid
1511		 * for load sharing laggs and if at least two links
1512		 * are active. Unfortunately, checking the latter would
1513		 * be too expensive at this point.
1514		 XXX
1515		if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1516		    (sc->sc_count > 1))
1517			new_link = LINK_STATE_FULL_DUPLEX;
1518		else
1519			new_link = rval->lp_link_state;
1520		 */
1521	}
1522
1523	return (rval);
1524}
1525
1526static const void *
1527lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1528{
1529	if (m->m_pkthdr.len < (off + len)) {
1530		return (NULL);
1531	} else if (m->m_len < (off + len)) {
1532		m_copydata(m, off, len, buf);
1533		return (buf);
1534	}
1535	return (mtod(m, char *) + off);
1536}
1537
1538static int
1539lagg_sysctl_active(SYSCTL_HANDLER_ARGS)
1540{
1541	struct lagg_softc *sc = (struct lagg_softc *)arg1;
1542	struct lagg_port *lp;
1543	int error;
1544
1545	/* LACP tracks active links automatically, the others do not */
1546	if (sc->sc_proto != LAGG_PROTO_LACP) {
1547		sc->sc_active = 0;
1548		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1549			sc->sc_active += LAGG_PORTACTIVE(lp);
1550	}
1551
1552	error = sysctl_handle_int(oidp, &sc->sc_active, 0, req);
1553	if ((error) || (req->newptr == NULL))
1554		return (error);
1555
1556	return (0);
1557}
1558
1559uint32_t
1560lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
1561{
1562	uint16_t etype;
1563	uint32_t p = key;
1564	int off;
1565	struct ether_header *eh;
1566	const struct ether_vlan_header *vlan;
1567#ifdef INET
1568	const struct ip *ip;
1569	const uint32_t *ports;
1570	int iphlen;
1571#endif
1572#ifdef INET6
1573	const struct ip6_hdr *ip6;
1574	uint32_t flow;
1575#endif
1576	union {
1577#ifdef INET
1578		struct ip ip;
1579#endif
1580#ifdef INET6
1581		struct ip6_hdr ip6;
1582#endif
1583		struct ether_vlan_header vlan;
1584		uint32_t port;
1585	} buf;
1586
1587
1588	off = sizeof(*eh);
1589	if (m->m_len < off)
1590		goto out;
1591	eh = mtod(m, struct ether_header *);
1592	etype = ntohs(eh->ether_type);
1593	if (sc->sc_flags & LAGG_F_HASHL2) {
1594		p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
1595		p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1596	}
1597
1598	/* Special handling for encapsulating VLAN frames */
1599	if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
1600		p = hash32_buf(&m->m_pkthdr.ether_vtag,
1601		    sizeof(m->m_pkthdr.ether_vtag), p);
1602	} else if (etype == ETHERTYPE_VLAN) {
1603		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &buf);
1604		if (vlan == NULL)
1605			goto out;
1606
1607		if (sc->sc_flags & LAGG_F_HASHL2)
1608			p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1609		etype = ntohs(vlan->evl_proto);
1610		off += sizeof(*vlan) - sizeof(*eh);
1611	}
1612
1613	switch (etype) {
1614#ifdef INET
1615	case ETHERTYPE_IP:
1616		ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
1617		if (ip == NULL)
1618			goto out;
1619
1620		if (sc->sc_flags & LAGG_F_HASHL3) {
1621			p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1622			p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1623		}
1624		if (!(sc->sc_flags & LAGG_F_HASHL4))
1625			break;
1626		switch (ip->ip_p) {
1627			case IPPROTO_TCP:
1628			case IPPROTO_UDP:
1629			case IPPROTO_SCTP:
1630				iphlen = ip->ip_hl << 2;
1631				if (iphlen < sizeof(*ip))
1632					break;
1633				off += iphlen;
1634				ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
1635				if (ports == NULL)
1636					break;
1637				p = hash32_buf(ports, sizeof(*ports), p);
1638				break;
1639		}
1640		break;
1641#endif
1642#ifdef INET6
1643	case ETHERTYPE_IPV6:
1644		if (!(sc->sc_flags & LAGG_F_HASHL3))
1645			break;
1646		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
1647		if (ip6 == NULL)
1648			goto out;
1649
1650		p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1651		p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1652		flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1653		p = hash32_buf(&flow, sizeof(flow), p);	/* IPv6 flow label */
1654		break;
1655#endif
1656	}
1657out:
1658	return (p);
1659}
1660
1661int
1662lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1663{
1664
1665	return (ifp->if_transmit)(ifp, m);
1666}
1667
1668/*
1669 * Simple round robin aggregation
1670 */
1671
1672static int
1673lagg_rr_attach(struct lagg_softc *sc)
1674{
1675	sc->sc_detach = lagg_rr_detach;
1676	sc->sc_start = lagg_rr_start;
1677	sc->sc_input = lagg_rr_input;
1678	sc->sc_port_create = NULL;
1679	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1680	sc->sc_seq = 0;
1681
1682	return (0);
1683}
1684
1685static int
1686lagg_rr_detach(struct lagg_softc *sc)
1687{
1688	return (0);
1689}
1690
1691static int
1692lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1693{
1694	struct lagg_port *lp;
1695	uint32_t p;
1696
1697	p = atomic_fetchadd_32(&sc->sc_seq, 1);
1698	p %= sc->sc_count;
1699	lp = SLIST_FIRST(&sc->sc_ports);
1700	while (p--)
1701		lp = SLIST_NEXT(lp, lp_entries);
1702
1703	/*
1704	 * Check the port's link state. This will return the next active
1705	 * port if the link is down or the port is NULL.
1706	 */
1707	if ((lp = lagg_link_active(sc, lp)) == NULL) {
1708		m_freem(m);
1709		return (ENETDOWN);
1710	}
1711
1712	/* Send mbuf */
1713	return (lagg_enqueue(lp->lp_ifp, m));
1714}
1715
1716static struct mbuf *
1717lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1718{
1719	struct ifnet *ifp = sc->sc_ifp;
1720
1721	/* Just pass in the packet to our lagg device */
1722	m->m_pkthdr.rcvif = ifp;
1723
1724	return (m);
1725}
1726
1727/*
1728 * Active failover
1729 */
1730
1731static int
1732lagg_fail_attach(struct lagg_softc *sc)
1733{
1734	sc->sc_detach = lagg_fail_detach;
1735	sc->sc_start = lagg_fail_start;
1736	sc->sc_input = lagg_fail_input;
1737	sc->sc_port_create = NULL;
1738	sc->sc_port_destroy = NULL;
1739
1740	return (0);
1741}
1742
1743static int
1744lagg_fail_detach(struct lagg_softc *sc)
1745{
1746	return (0);
1747}
1748
1749static int
1750lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
1751{
1752	struct lagg_port *lp;
1753
1754	/* Use the master port if active or the next available port */
1755	if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
1756		m_freem(m);
1757		return (ENETDOWN);
1758	}
1759
1760	/* Send mbuf */
1761	return (lagg_enqueue(lp->lp_ifp, m));
1762}
1763
1764static struct mbuf *
1765lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1766{
1767	struct ifnet *ifp = sc->sc_ifp;
1768	struct lagg_port *tmp_tp;
1769
1770	if (lp == sc->sc_primary || lagg_failover_rx_all) {
1771		m->m_pkthdr.rcvif = ifp;
1772		return (m);
1773	}
1774
1775	if (!LAGG_PORTACTIVE(sc->sc_primary)) {
1776		tmp_tp = lagg_link_active(sc, sc->sc_primary);
1777		/*
1778		 * If tmp_tp is null, we've recieved a packet when all
1779		 * our links are down. Weird, but process it anyways.
1780		 */
1781		if ((tmp_tp == NULL || tmp_tp == lp)) {
1782			m->m_pkthdr.rcvif = ifp;
1783			return (m);
1784		}
1785	}
1786
1787	m_freem(m);
1788	return (NULL);
1789}
1790
1791/*
1792 * Loadbalancing
1793 */
1794
1795static int
1796lagg_lb_attach(struct lagg_softc *sc)
1797{
1798	struct lagg_port *lp;
1799	struct lagg_lb *lb;
1800
1801	if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
1802	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
1803		return (ENOMEM);
1804
1805	sc->sc_detach = lagg_lb_detach;
1806	sc->sc_start = lagg_lb_start;
1807	sc->sc_input = lagg_lb_input;
1808	sc->sc_port_create = lagg_lb_port_create;
1809	sc->sc_port_destroy = lagg_lb_port_destroy;
1810	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1811
1812	lb->lb_key = arc4random();
1813	sc->sc_psc = (caddr_t)lb;
1814
1815	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1816		lagg_lb_port_create(lp);
1817
1818	return (0);
1819}
1820
1821static int
1822lagg_lb_detach(struct lagg_softc *sc)
1823{
1824	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1825	if (lb != NULL)
1826		free(lb, M_DEVBUF);
1827	return (0);
1828}
1829
1830static int
1831lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
1832{
1833	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1834	struct lagg_port *lp_next;
1835	int i = 0;
1836
1837	bzero(&lb->lb_ports, sizeof(lb->lb_ports));
1838	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1839		if (lp_next == lp)
1840			continue;
1841		if (i >= LAGG_MAX_PORTS)
1842			return (EINVAL);
1843		if (sc->sc_ifflags & IFF_DEBUG)
1844			printf("%s: port %s at index %d\n",
1845			    sc->sc_ifname, lp_next->lp_ifname, i);
1846		lb->lb_ports[i++] = lp_next;
1847	}
1848
1849	return (0);
1850}
1851
1852static int
1853lagg_lb_port_create(struct lagg_port *lp)
1854{
1855	struct lagg_softc *sc = lp->lp_softc;
1856	return (lagg_lb_porttable(sc, NULL));
1857}
1858
1859static void
1860lagg_lb_port_destroy(struct lagg_port *lp)
1861{
1862	struct lagg_softc *sc = lp->lp_softc;
1863	lagg_lb_porttable(sc, lp);
1864}
1865
1866static int
1867lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
1868{
1869	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1870	struct lagg_port *lp = NULL;
1871	uint32_t p = 0;
1872
1873	if (sc->use_flowid && (m->m_flags & M_FLOWID))
1874		p = m->m_pkthdr.flowid >> sc->flowid_shift;
1875	else
1876		p = lagg_hashmbuf(sc, m, lb->lb_key);
1877	p %= sc->sc_count;
1878	lp = lb->lb_ports[p];
1879
1880	/*
1881	 * Check the port's link state. This will return the next active
1882	 * port if the link is down or the port is NULL.
1883	 */
1884	if ((lp = lagg_link_active(sc, lp)) == NULL) {
1885		m_freem(m);
1886		return (ENETDOWN);
1887	}
1888
1889	/* Send mbuf */
1890	return (lagg_enqueue(lp->lp_ifp, m));
1891}
1892
1893static struct mbuf *
1894lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1895{
1896	struct ifnet *ifp = sc->sc_ifp;
1897
1898	/* Just pass in the packet to our lagg device */
1899	m->m_pkthdr.rcvif = ifp;
1900
1901	return (m);
1902}
1903
1904/*
1905 * 802.3ad LACP
1906 */
1907
1908static int
1909lagg_lacp_attach(struct lagg_softc *sc)
1910{
1911	struct lagg_port *lp;
1912	int error;
1913
1914	sc->sc_detach = lagg_lacp_detach;
1915	sc->sc_port_create = lacp_port_create;
1916	sc->sc_port_destroy = lacp_port_destroy;
1917	sc->sc_linkstate = lacp_linkstate;
1918	sc->sc_start = lagg_lacp_start;
1919	sc->sc_input = lagg_lacp_input;
1920	sc->sc_init = lacp_init;
1921	sc->sc_stop = lacp_stop;
1922	sc->sc_lladdr = lagg_lacp_lladdr;
1923	sc->sc_req = lacp_req;
1924	sc->sc_portreq = lacp_portreq;
1925
1926	error = lacp_attach(sc);
1927	if (error)
1928		return (error);
1929
1930	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1931		lacp_port_create(lp);
1932
1933	return (error);
1934}
1935
1936static int
1937lagg_lacp_detach(struct lagg_softc *sc)
1938{
1939	struct lagg_port *lp;
1940	int error;
1941
1942	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1943		lacp_port_destroy(lp);
1944
1945	/* unlocking is safe here */
1946	LAGG_WUNLOCK(sc);
1947	error = lacp_detach(sc);
1948	LAGG_WLOCK(sc);
1949
1950	return (error);
1951}
1952
1953static void
1954lagg_lacp_lladdr(struct lagg_softc *sc)
1955{
1956	struct lagg_port *lp;
1957
1958	/* purge all the lacp ports */
1959	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1960		lacp_port_destroy(lp);
1961
1962	/* add them back in */
1963	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1964		lacp_port_create(lp);
1965}
1966
1967static int
1968lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
1969{
1970	struct lagg_port *lp;
1971
1972	lp = lacp_select_tx_port(sc, m);
1973	if (lp == NULL) {
1974		m_freem(m);
1975		return (ENETDOWN);
1976	}
1977
1978	/* Send mbuf */
1979	return (lagg_enqueue(lp->lp_ifp, m));
1980}
1981
1982static struct mbuf *
1983lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1984{
1985	struct ifnet *ifp = sc->sc_ifp;
1986	struct ether_header *eh;
1987	u_short etype;
1988
1989	eh = mtod(m, struct ether_header *);
1990	etype = ntohs(eh->ether_type);
1991
1992	/* Tap off LACP control messages */
1993	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
1994		m = lacp_input(lp, m);
1995		if (m == NULL)
1996			return (NULL);
1997	}
1998
1999	/*
2000	 * If the port is not collecting or not in the active aggregator then
2001	 * free and return.
2002	 */
2003	if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
2004		m_freem(m);
2005		return (NULL);
2006	}
2007
2008	m->m_pkthdr.rcvif = ifp;
2009	return (m);
2010}
2011
2012static void
2013lagg_callout(void *arg)
2014{
2015	struct lagg_softc *sc = (struct lagg_softc *)arg;
2016	struct ifnet *ifp = sc->sc_ifp;
2017
2018	ifp->if_ipackets = counter_u64_fetch(sc->sc_ipackets);
2019	ifp->if_opackets = counter_u64_fetch(sc->sc_opackets);
2020	ifp->if_ibytes = counter_u64_fetch(sc->sc_ibytes);
2021	ifp->if_obytes = counter_u64_fetch(sc->sc_obytes);
2022
2023	callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
2024}
2025