if_lagg.c revision 274043
1/*	$OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD: stable/10/sys/net/if_lagg.c 274043 2014-11-03 12:38:29Z hselasky $");
22
23#include "opt_inet.h"
24#include "opt_inet6.h"
25
26#include <sys/param.h>
27#include <sys/kernel.h>
28#include <sys/malloc.h>
29#include <sys/mbuf.h>
30#include <sys/queue.h>
31#include <sys/socket.h>
32#include <sys/sockio.h>
33#include <sys/sysctl.h>
34#include <sys/module.h>
35#include <sys/priv.h>
36#include <sys/systm.h>
37#include <sys/proc.h>
38#include <sys/hash.h>
39#include <sys/lock.h>
40#include <sys/rmlock.h>
41#include <sys/taskqueue.h>
42#include <sys/eventhandler.h>
43
44#include <net/ethernet.h>
45#include <net/if.h>
46#include <net/if_clone.h>
47#include <net/if_arp.h>
48#include <net/if_dl.h>
49#include <net/if_llc.h>
50#include <net/if_media.h>
51#include <net/if_types.h>
52#include <net/if_var.h>
53#include <net/bpf.h>
54
55#if defined(INET) || defined(INET6)
56#include <netinet/in.h>
57#include <netinet/ip.h>
58#endif
59#ifdef INET
60#include <netinet/in_systm.h>
61#include <netinet/if_ether.h>
62#endif
63
64#ifdef INET6
65#include <netinet/ip6.h>
66#include <netinet6/in6_var.h>
67#include <netinet6/in6_ifattach.h>
68#endif
69
70#include <net/if_vlan_var.h>
71#include <net/if_lagg.h>
72#include <net/ieee8023ad_lacp.h>
73
74/* Special flags we should propagate to the lagg ports. */
75static struct {
76	int flag;
77	int (*func)(struct ifnet *, int);
78} lagg_pflags[] = {
79	{IFF_PROMISC, ifpromisc},
80	{IFF_ALLMULTI, if_allmulti},
81	{0, NULL}
82};
83
84SLIST_HEAD(__trhead, lagg_softc) lagg_list;	/* list of laggs */
85static struct mtx	lagg_list_mtx;
86eventhandler_tag	lagg_detach_cookie = NULL;
87
88static int	lagg_clone_create(struct if_clone *, int, caddr_t);
89static void	lagg_clone_destroy(struct ifnet *);
90static struct if_clone *lagg_cloner;
91static const char laggname[] = "lagg";
92
93static void	lagg_lladdr(struct lagg_softc *, uint8_t *);
94static void	lagg_capabilities(struct lagg_softc *);
95static void	lagg_port_lladdr(struct lagg_port *, uint8_t *);
96static void	lagg_port_setlladdr(void *, int);
97static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
98static int	lagg_port_destroy(struct lagg_port *, int);
99static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
100static void	lagg_linkstate(struct lagg_softc *);
101static void	lagg_port_state(struct ifnet *, int);
102static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
103static int	lagg_port_output(struct ifnet *, struct mbuf *,
104		    const struct sockaddr *, struct route *);
105static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
106#ifdef LAGG_PORT_STACKING
107static int	lagg_port_checkstacking(struct lagg_softc *);
108#endif
109static void	lagg_port2req(struct lagg_port *, struct lagg_reqport *);
110static void	lagg_init(void *);
111static void	lagg_stop(struct lagg_softc *);
112static int	lagg_ioctl(struct ifnet *, u_long, caddr_t);
113static int	lagg_ether_setmulti(struct lagg_softc *);
114static int	lagg_ether_cmdmulti(struct lagg_port *, int);
115static	int	lagg_setflag(struct lagg_port *, int, int,
116		    int (*func)(struct ifnet *, int));
117static	int	lagg_setflags(struct lagg_port *, int status);
118static int	lagg_transmit(struct ifnet *, struct mbuf *);
119static void	lagg_qflush(struct ifnet *);
120static int	lagg_media_change(struct ifnet *);
121static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
122static struct lagg_port *lagg_link_active(struct lagg_softc *,
123	    struct lagg_port *);
124static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
125static int	lagg_sysctl_active(SYSCTL_HANDLER_ARGS);
126
127/* Simple round robin */
128static int	lagg_rr_attach(struct lagg_softc *);
129static int	lagg_rr_detach(struct lagg_softc *);
130static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
131static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
132		    struct mbuf *);
133
134/* Active failover */
135static int	lagg_fail_attach(struct lagg_softc *);
136static int	lagg_fail_detach(struct lagg_softc *);
137static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
138static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
139		    struct mbuf *);
140
141/* Loadbalancing */
142static int	lagg_lb_attach(struct lagg_softc *);
143static int	lagg_lb_detach(struct lagg_softc *);
144static int	lagg_lb_port_create(struct lagg_port *);
145static void	lagg_lb_port_destroy(struct lagg_port *);
146static int	lagg_lb_start(struct lagg_softc *, struct mbuf *);
147static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
148		    struct mbuf *);
149static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
150
151/* 802.3ad LACP */
152static int	lagg_lacp_attach(struct lagg_softc *);
153static int	lagg_lacp_detach(struct lagg_softc *);
154static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
155static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
156		    struct mbuf *);
157static void	lagg_lacp_lladdr(struct lagg_softc *);
158
159static void	lagg_callout(void *);
160
161/* lagg protocol table */
162static const struct {
163	int			ti_proto;
164	int			(*ti_attach)(struct lagg_softc *);
165} lagg_protos[] = {
166	{ LAGG_PROTO_ROUNDROBIN,	lagg_rr_attach },
167	{ LAGG_PROTO_FAILOVER,		lagg_fail_attach },
168	{ LAGG_PROTO_LOADBALANCE,	lagg_lb_attach },
169	{ LAGG_PROTO_ETHERCHANNEL,	lagg_lb_attach },
170	{ LAGG_PROTO_LACP,		lagg_lacp_attach },
171	{ LAGG_PROTO_NONE,		NULL }
172};
173
174SYSCTL_DECL(_net_link);
175SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
176    "Link Aggregation");
177
178static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
179SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
180    &lagg_failover_rx_all, 0,
181    "Accept input from any interface in a failover lagg");
182static int def_use_flowid = 1; /* Default value for using M_FLOWID */
183TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
184SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
185    &def_use_flowid, 0,
186    "Default setting for using flow id for load sharing");
187static int def_flowid_shift = 16; /* Default value for using M_FLOWID */
188TUNABLE_INT("net.link.lagg.default_flowid_shift", &def_flowid_shift);
189SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RW,
190    &def_flowid_shift, 0,
191    "Default setting for flowid shift for load sharing");
192
193static int
194lagg_modevent(module_t mod, int type, void *data)
195{
196
197	switch (type) {
198	case MOD_LOAD:
199		mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
200		SLIST_INIT(&lagg_list);
201		lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
202		    lagg_clone_destroy, 0);
203		lagg_input_p = lagg_input;
204		lagg_linkstate_p = lagg_port_state;
205		lagg_detach_cookie = EVENTHANDLER_REGISTER(
206		    ifnet_departure_event, lagg_port_ifdetach, NULL,
207		    EVENTHANDLER_PRI_ANY);
208		break;
209	case MOD_UNLOAD:
210		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
211		    lagg_detach_cookie);
212		if_clone_detach(lagg_cloner);
213		lagg_input_p = NULL;
214		lagg_linkstate_p = NULL;
215		mtx_destroy(&lagg_list_mtx);
216		break;
217	default:
218		return (EOPNOTSUPP);
219	}
220	return (0);
221}
222
223static moduledata_t lagg_mod = {
224	"if_lagg",
225	lagg_modevent,
226	0
227};
228
229DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
230MODULE_VERSION(if_lagg, 1);
231
232/*
233 * This routine is run via an vlan
234 * config EVENT
235 */
236static void
237lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
238{
239        struct lagg_softc       *sc = ifp->if_softc;
240        struct lagg_port        *lp;
241        struct rm_priotracker   tracker;
242
243        if (ifp->if_softc !=  arg)   /* Not our event */
244                return;
245
246        LAGG_RLOCK(sc, &tracker);
247        if (!SLIST_EMPTY(&sc->sc_ports)) {
248                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
249                        EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
250        }
251        LAGG_RUNLOCK(sc, &tracker);
252}
253
254/*
255 * This routine is run via an vlan
256 * unconfig EVENT
257 */
258static void
259lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
260{
261        struct lagg_softc       *sc = ifp->if_softc;
262        struct lagg_port        *lp;
263        struct rm_priotracker   tracker;
264
265        if (ifp->if_softc !=  arg)   /* Not our event */
266                return;
267
268        LAGG_RLOCK(sc, &tracker);
269        if (!SLIST_EMPTY(&sc->sc_ports)) {
270                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
271                        EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
272        }
273        LAGG_RUNLOCK(sc, &tracker);
274}
275
276static int
277lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
278{
279	struct lagg_softc *sc;
280	struct ifnet *ifp;
281	int i, error = 0;
282	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
283	struct sysctl_oid *oid;
284	char num[14];			/* sufficient for 32 bits */
285
286	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
287	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
288	if (ifp == NULL) {
289		free(sc, M_DEVBUF);
290		return (ENOSPC);
291	}
292
293	sc->sc_ipackets = counter_u64_alloc(M_WAITOK);
294	sc->sc_opackets = counter_u64_alloc(M_WAITOK);
295	sc->sc_ibytes = counter_u64_alloc(M_WAITOK);
296	sc->sc_obytes = counter_u64_alloc(M_WAITOK);
297
298	sysctl_ctx_init(&sc->ctx);
299	snprintf(num, sizeof(num), "%u", unit);
300	sc->use_flowid = def_use_flowid;
301	sc->flowid_shift = def_flowid_shift;
302	sc->sc_oid = oid = SYSCTL_ADD_NODE(&sc->ctx,
303		&SYSCTL_NODE_CHILDREN(_net_link, lagg),
304		OID_AUTO, num, CTLFLAG_RD, NULL, "");
305	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
306		"use_flowid", CTLFLAG_RW, &sc->use_flowid,
307		sc->use_flowid, "Use flow id for load sharing");
308	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
309		"flowid_shift", CTLFLAG_RW, &sc->flowid_shift,
310		sc->flowid_shift,
311		"Shift flowid bits to prevent multiqueue collisions");
312	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
313		"count", CTLFLAG_RD, &sc->sc_count, sc->sc_count,
314		"Total number of ports");
315	SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
316		"active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active,
317		"I", "Total number of active ports");
318	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
319		"flapping", CTLFLAG_RD, &sc->sc_flapping,
320		sc->sc_flapping, "Total number of port change events");
321	/* Hash all layers by default */
322	sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
323
324	sc->sc_proto = LAGG_PROTO_NONE;
325	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
326		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
327			sc->sc_proto = lagg_protos[i].ti_proto;
328			if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
329				if_free(ifp);
330				free(sc, M_DEVBUF);
331				return (error);
332			}
333			break;
334		}
335	}
336	LAGG_LOCK_INIT(sc);
337	LAGG_CALLOUT_LOCK_INIT(sc);
338	SLIST_INIT(&sc->sc_ports);
339	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
340
341	/*
342	 * This uses the callout lock rather than the rmlock; one can't
343	 * hold said rmlock during SWI.
344	 */
345	callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
346
347	/* Initialise pseudo media types */
348	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
349	    lagg_media_status);
350	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
351	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
352
353	if_initname(ifp, laggname, unit);
354	ifp->if_softc = sc;
355	ifp->if_transmit = lagg_transmit;
356	ifp->if_qflush = lagg_qflush;
357	ifp->if_init = lagg_init;
358	ifp->if_ioctl = lagg_ioctl;
359	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
360	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
361
362	/*
363	 * Attach as an ordinary ethernet device, children will be attached
364	 * as special device IFT_IEEE8023ADLAG.
365	 */
366	ether_ifattach(ifp, eaddr);
367
368	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
369		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
370	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
371		lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
372
373	/* Insert into the global list of laggs */
374	mtx_lock(&lagg_list_mtx);
375	SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
376	mtx_unlock(&lagg_list_mtx);
377
378	callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
379
380	return (0);
381}
382
383static void
384lagg_clone_destroy(struct ifnet *ifp)
385{
386	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
387	struct lagg_port *lp;
388
389	LAGG_WLOCK(sc);
390
391	lagg_stop(sc);
392	ifp->if_flags &= ~IFF_UP;
393
394	EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
395	EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
396
397	/* Shutdown and remove lagg ports */
398	while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
399		lagg_port_destroy(lp, 1);
400	/* Unhook the aggregation protocol */
401	if (sc->sc_detach != NULL)
402		(*sc->sc_detach)(sc);
403
404	LAGG_WUNLOCK(sc);
405
406	sysctl_ctx_free(&sc->ctx);
407	ifmedia_removeall(&sc->sc_media);
408	ether_ifdetach(ifp);
409	if_free(ifp);
410
411	/* This grabs sc_callout_mtx, serialising it correctly */
412	callout_drain(&sc->sc_callout);
413
414	/* At this point it's drained; we can free this */
415	counter_u64_free(sc->sc_ipackets);
416	counter_u64_free(sc->sc_opackets);
417	counter_u64_free(sc->sc_ibytes);
418	counter_u64_free(sc->sc_obytes);
419
420	mtx_lock(&lagg_list_mtx);
421	SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
422	mtx_unlock(&lagg_list_mtx);
423
424	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
425	LAGG_LOCK_DESTROY(sc);
426	LAGG_CALLOUT_LOCK_DESTROY(sc);
427	free(sc, M_DEVBUF);
428}
429
430static void
431lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
432{
433	struct ifnet *ifp = sc->sc_ifp;
434
435	if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
436		return;
437
438	bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
439	/* Let the protocol know the MAC has changed */
440	if (sc->sc_lladdr != NULL)
441		(*sc->sc_lladdr)(sc);
442	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
443}
444
445static void
446lagg_capabilities(struct lagg_softc *sc)
447{
448	struct lagg_port *lp;
449	int cap = ~0, ena = ~0;
450	u_long hwa = ~0UL;
451	struct ifnet_hw_tsomax hw_tsomax;
452
453	LAGG_WLOCK_ASSERT(sc);
454
455	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
456
457	/* Get capabilities from the lagg ports */
458	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
459		cap &= lp->lp_ifp->if_capabilities;
460		ena &= lp->lp_ifp->if_capenable;
461		hwa &= lp->lp_ifp->if_hwassist;
462		if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
463	}
464	cap = (cap == ~0 ? 0 : cap);
465	ena = (ena == ~0 ? 0 : ena);
466	hwa = (hwa == ~0 ? 0 : hwa);
467
468	if (sc->sc_ifp->if_capabilities != cap ||
469	    sc->sc_ifp->if_capenable != ena ||
470	    sc->sc_ifp->if_hwassist != hwa ||
471	    if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
472		sc->sc_ifp->if_capabilities = cap;
473		sc->sc_ifp->if_capenable = ena;
474		sc->sc_ifp->if_hwassist = hwa;
475		getmicrotime(&sc->sc_ifp->if_lastchange);
476
477		if (sc->sc_ifflags & IFF_DEBUG)
478			if_printf(sc->sc_ifp,
479			    "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
480	}
481}
482
483static void
484lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
485{
486	struct lagg_softc *sc = lp->lp_softc;
487	struct ifnet *ifp = lp->lp_ifp;
488	struct lagg_llq *llq;
489	int pending = 0;
490
491	LAGG_WLOCK_ASSERT(sc);
492
493	if (lp->lp_detaching ||
494	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
495		return;
496
497	/* Check to make sure its not already queued to be changed */
498	SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
499		if (llq->llq_ifp == ifp) {
500			pending = 1;
501			break;
502		}
503	}
504
505	if (!pending) {
506		llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
507		if (llq == NULL)	/* XXX what to do */
508			return;
509	}
510
511	/* Update the lladdr even if pending, it may have changed */
512	llq->llq_ifp = ifp;
513	bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
514
515	if (!pending)
516		SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
517
518	taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
519}
520
521/*
522 * Set the interface MAC address from a taskqueue to avoid a LOR.
523 */
524static void
525lagg_port_setlladdr(void *arg, int pending)
526{
527	struct lagg_softc *sc = (struct lagg_softc *)arg;
528	struct lagg_llq *llq, *head;
529	struct ifnet *ifp;
530	int error;
531
532	/* Grab a local reference of the queue and remove it from the softc */
533	LAGG_WLOCK(sc);
534	head = SLIST_FIRST(&sc->sc_llq_head);
535	SLIST_FIRST(&sc->sc_llq_head) = NULL;
536	LAGG_WUNLOCK(sc);
537
538	/*
539	 * Traverse the queue and set the lladdr on each ifp. It is safe to do
540	 * unlocked as we have the only reference to it.
541	 */
542	for (llq = head; llq != NULL; llq = head) {
543		ifp = llq->llq_ifp;
544
545		/* Set the link layer address */
546		CURVNET_SET(ifp->if_vnet);
547		error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
548		CURVNET_RESTORE();
549		if (error)
550			printf("%s: setlladdr failed on %s\n", __func__,
551			    ifp->if_xname);
552
553		head = SLIST_NEXT(llq, llq_entries);
554		free(llq, M_DEVBUF);
555	}
556}
557
558static int
559lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
560{
561	struct lagg_softc *sc_ptr;
562	struct lagg_port *lp, *tlp;
563	int error = 0;
564
565	LAGG_WLOCK_ASSERT(sc);
566
567	/* Limit the maximal number of lagg ports */
568	if (sc->sc_count >= LAGG_MAX_PORTS)
569		return (ENOSPC);
570
571	/* Check if port has already been associated to a lagg */
572	if (ifp->if_lagg != NULL) {
573		/* Port is already in the current lagg? */
574		lp = (struct lagg_port *)ifp->if_lagg;
575		if (lp->lp_softc == sc)
576			return (EEXIST);
577		return (EBUSY);
578	}
579
580	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
581	if (ifp->if_type != IFT_ETHER)
582		return (EPROTONOSUPPORT);
583
584#ifdef INET6
585	/*
586	 * The member interface should not have inet6 address because
587	 * two interfaces with a valid link-local scope zone must not be
588	 * merged in any form.  This restriction is needed to
589	 * prevent violation of link-local scope zone.  Attempts to
590	 * add a member interface which has inet6 addresses triggers
591	 * removal of all inet6 addresses on the member interface.
592	 */
593	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
594		if (in6ifa_llaonifp(lp->lp_ifp)) {
595			in6_ifdetach(lp->lp_ifp);
596			if_printf(sc->sc_ifp,
597			    "IPv6 addresses on %s have been removed "
598			    "before adding it as a member to prevent "
599			    "IPv6 address scope violation.\n",
600			    lp->lp_ifp->if_xname);
601		}
602	}
603	if (in6ifa_llaonifp(ifp)) {
604		in6_ifdetach(ifp);
605		if_printf(sc->sc_ifp,
606		    "IPv6 addresses on %s have been removed "
607		    "before adding it as a member to prevent "
608		    "IPv6 address scope violation.\n",
609		    ifp->if_xname);
610	}
611#endif
612	/* Allow the first Ethernet member to define the MTU */
613	if (SLIST_EMPTY(&sc->sc_ports))
614		sc->sc_ifp->if_mtu = ifp->if_mtu;
615	else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
616		if_printf(sc->sc_ifp, "invalid MTU for %s\n",
617		    ifp->if_xname);
618		return (EINVAL);
619	}
620
621	if ((lp = malloc(sizeof(struct lagg_port),
622	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
623		return (ENOMEM);
624
625	/* Check if port is a stacked lagg */
626	mtx_lock(&lagg_list_mtx);
627	SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
628		if (ifp == sc_ptr->sc_ifp) {
629			mtx_unlock(&lagg_list_mtx);
630			free(lp, M_DEVBUF);
631			return (EINVAL);
632			/* XXX disable stacking for the moment, its untested */
633#ifdef LAGG_PORT_STACKING
634			lp->lp_flags |= LAGG_PORT_STACK;
635			if (lagg_port_checkstacking(sc_ptr) >=
636			    LAGG_MAX_STACKING) {
637				mtx_unlock(&lagg_list_mtx);
638				free(lp, M_DEVBUF);
639				return (E2BIG);
640			}
641#endif
642		}
643	}
644	mtx_unlock(&lagg_list_mtx);
645
646	/* Change the interface type */
647	lp->lp_iftype = ifp->if_type;
648	ifp->if_type = IFT_IEEE8023ADLAG;
649	ifp->if_lagg = lp;
650	lp->lp_ioctl = ifp->if_ioctl;
651	ifp->if_ioctl = lagg_port_ioctl;
652	lp->lp_output = ifp->if_output;
653	ifp->if_output = lagg_port_output;
654
655	lp->lp_ifp = ifp;
656	lp->lp_softc = sc;
657
658	/* Save port link layer address */
659	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
660
661	if (SLIST_EMPTY(&sc->sc_ports)) {
662		sc->sc_primary = lp;
663		lagg_lladdr(sc, IF_LLADDR(ifp));
664	} else {
665		/* Update link layer address for this port */
666		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
667	}
668
669	/* Insert into the list of ports. Keep ports sorted by if_index. */
670	SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
671		if (tlp->lp_ifp->if_index < ifp->if_index && (
672		    SLIST_NEXT(tlp, lp_entries) == NULL ||
673		    SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index <
674		    ifp->if_index))
675			break;
676	}
677	if (tlp != NULL)
678		SLIST_INSERT_AFTER(tlp, lp, lp_entries);
679	else
680		SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
681	sc->sc_count++;
682
683	/* Update lagg capabilities */
684	lagg_capabilities(sc);
685	lagg_linkstate(sc);
686
687	/* Add multicast addresses and interface flags to this port */
688	lagg_ether_cmdmulti(lp, 1);
689	lagg_setflags(lp, 1);
690
691	if (sc->sc_port_create != NULL)
692		error = (*sc->sc_port_create)(lp);
693	if (error) {
694		/* remove the port again, without calling sc_port_destroy */
695		lagg_port_destroy(lp, 0);
696		return (error);
697	}
698
699	return (error);
700}
701
702#ifdef LAGG_PORT_STACKING
703static int
704lagg_port_checkstacking(struct lagg_softc *sc)
705{
706	struct lagg_softc *sc_ptr;
707	struct lagg_port *lp;
708	int m = 0;
709
710	LAGG_WLOCK_ASSERT(sc);
711
712	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
713		if (lp->lp_flags & LAGG_PORT_STACK) {
714			sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
715			m = MAX(m, lagg_port_checkstacking(sc_ptr));
716		}
717	}
718
719	return (m + 1);
720}
721#endif
722
723static int
724lagg_port_destroy(struct lagg_port *lp, int runpd)
725{
726	struct lagg_softc *sc = lp->lp_softc;
727	struct lagg_port *lp_ptr;
728	struct lagg_llq *llq;
729	struct ifnet *ifp = lp->lp_ifp;
730
731	LAGG_WLOCK_ASSERT(sc);
732
733	if (runpd && sc->sc_port_destroy != NULL)
734		(*sc->sc_port_destroy)(lp);
735
736	/*
737	 * Remove multicast addresses and interface flags from this port and
738	 * reset the MAC address, skip if the interface is being detached.
739	 */
740	if (!lp->lp_detaching) {
741		lagg_ether_cmdmulti(lp, 0);
742		lagg_setflags(lp, 0);
743		lagg_port_lladdr(lp, lp->lp_lladdr);
744	}
745
746	/* Restore interface */
747	ifp->if_type = lp->lp_iftype;
748	ifp->if_ioctl = lp->lp_ioctl;
749	ifp->if_output = lp->lp_output;
750	ifp->if_lagg = NULL;
751
752	/* Finally, remove the port from the lagg */
753	SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
754	sc->sc_count--;
755
756	/* Update the primary interface */
757	if (lp == sc->sc_primary) {
758		uint8_t lladdr[ETHER_ADDR_LEN];
759
760		if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
761			bzero(&lladdr, ETHER_ADDR_LEN);
762		} else {
763			bcopy(lp_ptr->lp_lladdr,
764			    lladdr, ETHER_ADDR_LEN);
765		}
766		lagg_lladdr(sc, lladdr);
767		sc->sc_primary = lp_ptr;
768
769		/* Update link layer address for each port */
770		SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
771			lagg_port_lladdr(lp_ptr, lladdr);
772	}
773
774	/* Remove any pending lladdr changes from the queue */
775	if (lp->lp_detaching) {
776		SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
777			if (llq->llq_ifp == ifp) {
778				SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
779				    llq_entries);
780				free(llq, M_DEVBUF);
781				break;	/* Only appears once */
782			}
783		}
784	}
785
786	if (lp->lp_ifflags)
787		if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
788
789	free(lp, M_DEVBUF);
790
791	/* Update lagg capabilities */
792	lagg_capabilities(sc);
793	lagg_linkstate(sc);
794
795	return (0);
796}
797
798static int
799lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
800{
801	struct lagg_reqport *rp = (struct lagg_reqport *)data;
802	struct lagg_softc *sc;
803	struct lagg_port *lp = NULL;
804	int error = 0;
805	struct rm_priotracker tracker;
806
807	/* Should be checked by the caller */
808	if (ifp->if_type != IFT_IEEE8023ADLAG ||
809	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
810		goto fallback;
811
812	switch (cmd) {
813	case SIOCGLAGGPORT:
814		if (rp->rp_portname[0] == '\0' ||
815		    ifunit(rp->rp_portname) != ifp) {
816			error = EINVAL;
817			break;
818		}
819
820		LAGG_RLOCK(sc, &tracker);
821		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
822			error = ENOENT;
823			LAGG_RUNLOCK(sc, &tracker);
824			break;
825		}
826
827		lagg_port2req(lp, rp);
828		LAGG_RUNLOCK(sc, &tracker);
829		break;
830
831	case SIOCSIFCAP:
832		if (lp->lp_ioctl == NULL) {
833			error = EINVAL;
834			break;
835		}
836		error = (*lp->lp_ioctl)(ifp, cmd, data);
837		if (error)
838			break;
839
840		/* Update lagg interface capabilities */
841		LAGG_WLOCK(sc);
842		lagg_capabilities(sc);
843		LAGG_WUNLOCK(sc);
844		break;
845
846	case SIOCSIFMTU:
847		/* Do not allow the MTU to be changed once joined */
848		error = EINVAL;
849		break;
850
851	default:
852		goto fallback;
853	}
854
855	return (error);
856
857fallback:
858	if (lp->lp_ioctl != NULL)
859		return ((*lp->lp_ioctl)(ifp, cmd, data));
860
861	return (EINVAL);
862}
863
864/*
865 * For direct output to child ports.
866 */
867static int
868lagg_port_output(struct ifnet *ifp, struct mbuf *m,
869	const struct sockaddr *dst, struct route *ro)
870{
871	struct lagg_port *lp = ifp->if_lagg;
872
873	switch (dst->sa_family) {
874		case pseudo_AF_HDRCMPLT:
875		case AF_UNSPEC:
876			return ((*lp->lp_output)(ifp, m, dst, ro));
877	}
878
879	/* drop any other frames */
880	m_freem(m);
881	return (ENETDOWN);
882}
883
884static void
885lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
886{
887	struct lagg_port *lp;
888	struct lagg_softc *sc;
889
890	if ((lp = ifp->if_lagg) == NULL)
891		return;
892	/* If the ifnet is just being renamed, don't do anything. */
893	if (ifp->if_flags & IFF_RENAMING)
894		return;
895
896	sc = lp->lp_softc;
897
898	LAGG_WLOCK(sc);
899	lp->lp_detaching = 1;
900	lagg_port_destroy(lp, 1);
901	LAGG_WUNLOCK(sc);
902}
903
904static void
905lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
906{
907	struct lagg_softc *sc = lp->lp_softc;
908
909	strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
910	strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
911	rp->rp_prio = lp->lp_prio;
912	rp->rp_flags = lp->lp_flags;
913	if (sc->sc_portreq != NULL)
914		(*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
915
916	/* Add protocol specific flags */
917	switch (sc->sc_proto) {
918		case LAGG_PROTO_FAILOVER:
919			if (lp == sc->sc_primary)
920				rp->rp_flags |= LAGG_PORT_MASTER;
921			if (lp == lagg_link_active(sc, sc->sc_primary))
922				rp->rp_flags |= LAGG_PORT_ACTIVE;
923			break;
924
925		case LAGG_PROTO_ROUNDROBIN:
926		case LAGG_PROTO_LOADBALANCE:
927		case LAGG_PROTO_ETHERCHANNEL:
928			if (LAGG_PORTACTIVE(lp))
929				rp->rp_flags |= LAGG_PORT_ACTIVE;
930			break;
931
932		case LAGG_PROTO_LACP:
933			/* LACP has a different definition of active */
934			if (lacp_isactive(lp))
935				rp->rp_flags |= LAGG_PORT_ACTIVE;
936			if (lacp_iscollecting(lp))
937				rp->rp_flags |= LAGG_PORT_COLLECTING;
938			if (lacp_isdistributing(lp))
939				rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
940			break;
941	}
942
943}
944
945static void
946lagg_init(void *xsc)
947{
948	struct lagg_softc *sc = (struct lagg_softc *)xsc;
949	struct lagg_port *lp;
950	struct ifnet *ifp = sc->sc_ifp;
951
952	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
953		return;
954
955	LAGG_WLOCK(sc);
956
957	ifp->if_drv_flags |= IFF_DRV_RUNNING;
958	/* Update the port lladdrs */
959	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
960		lagg_port_lladdr(lp, IF_LLADDR(ifp));
961
962	if (sc->sc_init != NULL)
963		(*sc->sc_init)(sc);
964
965	LAGG_WUNLOCK(sc);
966}
967
968static void
969lagg_stop(struct lagg_softc *sc)
970{
971	struct ifnet *ifp = sc->sc_ifp;
972
973	LAGG_WLOCK_ASSERT(sc);
974
975	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
976		return;
977
978	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
979
980	if (sc->sc_stop != NULL)
981		(*sc->sc_stop)(sc);
982}
983
984static int
985lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
986{
987	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
988	struct lagg_reqall *ra = (struct lagg_reqall *)data;
989	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
990	struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
991	struct ifreq *ifr = (struct ifreq *)data;
992	struct lagg_port *lp;
993	struct ifnet *tpif;
994	struct thread *td = curthread;
995	char *buf, *outbuf;
996	int count, buflen, len, error = 0;
997	struct rm_priotracker tracker;
998
999	bzero(&rpbuf, sizeof(rpbuf));
1000
1001	switch (cmd) {
1002	case SIOCGLAGG:
1003		LAGG_RLOCK(sc, &tracker);
1004		count = 0;
1005		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1006			count++;
1007		buflen = count * sizeof(struct lagg_reqport);
1008		LAGG_RUNLOCK(sc, &tracker);
1009
1010		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
1011
1012		LAGG_RLOCK(sc, &tracker);
1013		ra->ra_proto = sc->sc_proto;
1014		if (sc->sc_req != NULL)
1015			(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
1016
1017		count = 0;
1018		buf = outbuf;
1019		len = min(ra->ra_size, buflen);
1020		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1021			if (len < sizeof(rpbuf))
1022				break;
1023
1024			lagg_port2req(lp, &rpbuf);
1025			memcpy(buf, &rpbuf, sizeof(rpbuf));
1026			count++;
1027			buf += sizeof(rpbuf);
1028			len -= sizeof(rpbuf);
1029		}
1030		LAGG_RUNLOCK(sc, &tracker);
1031		ra->ra_ports = count;
1032		ra->ra_size = count * sizeof(rpbuf);
1033		error = copyout(outbuf, ra->ra_port, ra->ra_size);
1034		free(outbuf, M_TEMP);
1035		break;
1036	case SIOCSLAGG:
1037		error = priv_check(td, PRIV_NET_LAGG);
1038		if (error)
1039			break;
1040		if (ra->ra_proto >= LAGG_PROTO_MAX) {
1041			error = EPROTONOSUPPORT;
1042			break;
1043		}
1044		LAGG_WLOCK(sc);
1045		if (sc->sc_proto != LAGG_PROTO_NONE) {
1046			/* Reset protocol first in case detach unlocks */
1047			sc->sc_proto = LAGG_PROTO_NONE;
1048			error = sc->sc_detach(sc);
1049			sc->sc_detach = NULL;
1050			sc->sc_start = NULL;
1051			sc->sc_input = NULL;
1052			sc->sc_port_create = NULL;
1053			sc->sc_port_destroy = NULL;
1054			sc->sc_linkstate = NULL;
1055			sc->sc_init = NULL;
1056			sc->sc_stop = NULL;
1057			sc->sc_lladdr = NULL;
1058			sc->sc_req = NULL;
1059			sc->sc_portreq = NULL;
1060		} else if (sc->sc_input != NULL) {
1061			/* Still detaching */
1062			error = EBUSY;
1063		}
1064		if (error != 0) {
1065			LAGG_WUNLOCK(sc);
1066			break;
1067		}
1068		for (int i = 0; i < (sizeof(lagg_protos) /
1069		    sizeof(lagg_protos[0])); i++) {
1070			if (lagg_protos[i].ti_proto == ra->ra_proto) {
1071				if (sc->sc_ifflags & IFF_DEBUG)
1072					printf("%s: using proto %u\n",
1073					    sc->sc_ifname,
1074					    lagg_protos[i].ti_proto);
1075				sc->sc_proto = lagg_protos[i].ti_proto;
1076				if (sc->sc_proto != LAGG_PROTO_NONE)
1077					error = lagg_protos[i].ti_attach(sc);
1078				LAGG_WUNLOCK(sc);
1079				return (error);
1080			}
1081		}
1082		LAGG_WUNLOCK(sc);
1083		error = EPROTONOSUPPORT;
1084		break;
1085	case SIOCGLAGGFLAGS:
1086		rf->rf_flags = sc->sc_flags;
1087		break;
1088	case SIOCSLAGGHASH:
1089		error = priv_check(td, PRIV_NET_LAGG);
1090		if (error)
1091			break;
1092		if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
1093			error = EINVAL;
1094			break;
1095		}
1096		LAGG_WLOCK(sc);
1097		sc->sc_flags &= ~LAGG_F_HASHMASK;
1098		sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
1099		LAGG_WUNLOCK(sc);
1100		break;
1101	case SIOCGLAGGPORT:
1102		if (rp->rp_portname[0] == '\0' ||
1103		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1104			error = EINVAL;
1105			break;
1106		}
1107
1108		LAGG_RLOCK(sc, &tracker);
1109		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1110		    lp->lp_softc != sc) {
1111			error = ENOENT;
1112			LAGG_RUNLOCK(sc, &tracker);
1113			break;
1114		}
1115
1116		lagg_port2req(lp, rp);
1117		LAGG_RUNLOCK(sc, &tracker);
1118		break;
1119	case SIOCSLAGGPORT:
1120		error = priv_check(td, PRIV_NET_LAGG);
1121		if (error)
1122			break;
1123		if (rp->rp_portname[0] == '\0' ||
1124		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1125			error = EINVAL;
1126			break;
1127		}
1128		LAGG_WLOCK(sc);
1129		error = lagg_port_create(sc, tpif);
1130		LAGG_WUNLOCK(sc);
1131		break;
1132	case SIOCSLAGGDELPORT:
1133		error = priv_check(td, PRIV_NET_LAGG);
1134		if (error)
1135			break;
1136		if (rp->rp_portname[0] == '\0' ||
1137		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1138			error = EINVAL;
1139			break;
1140		}
1141
1142		LAGG_WLOCK(sc);
1143		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1144		    lp->lp_softc != sc) {
1145			error = ENOENT;
1146			LAGG_WUNLOCK(sc);
1147			break;
1148		}
1149
1150		error = lagg_port_destroy(lp, 1);
1151		LAGG_WUNLOCK(sc);
1152		break;
1153	case SIOCSIFFLAGS:
1154		/* Set flags on ports too */
1155		LAGG_WLOCK(sc);
1156		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1157			lagg_setflags(lp, 1);
1158		}
1159		LAGG_WUNLOCK(sc);
1160
1161		if (!(ifp->if_flags & IFF_UP) &&
1162		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1163			/*
1164			 * If interface is marked down and it is running,
1165			 * then stop and disable it.
1166			 */
1167			LAGG_WLOCK(sc);
1168			lagg_stop(sc);
1169			LAGG_WUNLOCK(sc);
1170		} else if ((ifp->if_flags & IFF_UP) &&
1171		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1172			/*
1173			 * If interface is marked up and it is stopped, then
1174			 * start it.
1175			 */
1176			(*ifp->if_init)(sc);
1177		}
1178		break;
1179	case SIOCADDMULTI:
1180	case SIOCDELMULTI:
1181		LAGG_WLOCK(sc);
1182		error = lagg_ether_setmulti(sc);
1183		LAGG_WUNLOCK(sc);
1184		break;
1185	case SIOCSIFMEDIA:
1186	case SIOCGIFMEDIA:
1187		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1188		break;
1189
1190	case SIOCSIFCAP:
1191	case SIOCSIFMTU:
1192		/* Do not allow the MTU or caps to be directly changed */
1193		error = EINVAL;
1194		break;
1195
1196	default:
1197		error = ether_ioctl(ifp, cmd, data);
1198		break;
1199	}
1200	return (error);
1201}
1202
1203static int
1204lagg_ether_setmulti(struct lagg_softc *sc)
1205{
1206	struct lagg_port *lp;
1207
1208	LAGG_WLOCK_ASSERT(sc);
1209
1210	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1211		/* First, remove any existing filter entries. */
1212		lagg_ether_cmdmulti(lp, 0);
1213		/* copy all addresses from the lagg interface to the port */
1214		lagg_ether_cmdmulti(lp, 1);
1215	}
1216	return (0);
1217}
1218
1219static int
1220lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1221{
1222	struct lagg_softc *sc = lp->lp_softc;
1223	struct ifnet *ifp = lp->lp_ifp;
1224	struct ifnet *scifp = sc->sc_ifp;
1225	struct lagg_mc *mc;
1226	struct ifmultiaddr *ifma;
1227	int error;
1228
1229	LAGG_WLOCK_ASSERT(sc);
1230
1231	if (set) {
1232		IF_ADDR_WLOCK(scifp);
1233		TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1234			if (ifma->ifma_addr->sa_family != AF_LINK)
1235				continue;
1236			mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1237			if (mc == NULL) {
1238				IF_ADDR_WUNLOCK(scifp);
1239				return (ENOMEM);
1240			}
1241			bcopy(ifma->ifma_addr, &mc->mc_addr,
1242			    ifma->ifma_addr->sa_len);
1243			mc->mc_addr.sdl_index = ifp->if_index;
1244			mc->mc_ifma = NULL;
1245			SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
1246		}
1247		IF_ADDR_WUNLOCK(scifp);
1248		SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
1249			error = if_addmulti(ifp,
1250			    (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
1251			if (error)
1252				return (error);
1253		}
1254	} else {
1255		while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
1256			SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
1257			if (mc->mc_ifma && !lp->lp_detaching)
1258				if_delmulti_ifma(mc->mc_ifma);
1259			free(mc, M_DEVBUF);
1260		}
1261	}
1262	return (0);
1263}
1264
1265/* Handle a ref counted flag that should be set on the lagg port as well */
1266static int
1267lagg_setflag(struct lagg_port *lp, int flag, int status,
1268	     int (*func)(struct ifnet *, int))
1269{
1270	struct lagg_softc *sc = lp->lp_softc;
1271	struct ifnet *scifp = sc->sc_ifp;
1272	struct ifnet *ifp = lp->lp_ifp;
1273	int error;
1274
1275	LAGG_WLOCK_ASSERT(sc);
1276
1277	status = status ? (scifp->if_flags & flag) : 0;
1278	/* Now "status" contains the flag value or 0 */
1279
1280	/*
1281	 * See if recorded ports status is different from what
1282	 * we want it to be.  If it is, flip it.  We record ports
1283	 * status in lp_ifflags so that we won't clear ports flag
1284	 * we haven't set.  In fact, we don't clear or set ports
1285	 * flags directly, but get or release references to them.
1286	 * That's why we can be sure that recorded flags still are
1287	 * in accord with actual ports flags.
1288	 */
1289	if (status != (lp->lp_ifflags & flag)) {
1290		error = (*func)(ifp, status);
1291		if (error)
1292			return (error);
1293		lp->lp_ifflags &= ~flag;
1294		lp->lp_ifflags |= status;
1295	}
1296	return (0);
1297}
1298
1299/*
1300 * Handle IFF_* flags that require certain changes on the lagg port
1301 * if "status" is true, update ports flags respective to the lagg
1302 * if "status" is false, forcedly clear the flags set on port.
1303 */
1304static int
1305lagg_setflags(struct lagg_port *lp, int status)
1306{
1307	int error, i;
1308
1309	for (i = 0; lagg_pflags[i].flag; i++) {
1310		error = lagg_setflag(lp, lagg_pflags[i].flag,
1311		    status, lagg_pflags[i].func);
1312		if (error)
1313			return (error);
1314	}
1315	return (0);
1316}
1317
1318static int
1319lagg_transmit(struct ifnet *ifp, struct mbuf *m)
1320{
1321	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1322	int error, len, mcast;
1323	struct rm_priotracker tracker;
1324
1325	len = m->m_pkthdr.len;
1326	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1327
1328	LAGG_RLOCK(sc, &tracker);
1329	/* We need a Tx algorithm and at least one port */
1330	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
1331		LAGG_RUNLOCK(sc, &tracker);
1332		m_freem(m);
1333		ifp->if_oerrors++;
1334		return (ENXIO);
1335	}
1336
1337	ETHER_BPF_MTAP(ifp, m);
1338
1339	error = (*sc->sc_start)(sc, m);
1340	LAGG_RUNLOCK(sc, &tracker);
1341
1342	if (error == 0) {
1343		counter_u64_add(sc->sc_opackets, 1);
1344		counter_u64_add(sc->sc_obytes, len);
1345		ifp->if_omcasts += mcast;
1346	} else
1347		ifp->if_oerrors++;
1348
1349	return (error);
1350}
1351
1352/*
1353 * The ifp->if_qflush entry point for lagg(4) is no-op.
1354 */
1355static void
1356lagg_qflush(struct ifnet *ifp __unused)
1357{
1358}
1359
1360static struct mbuf *
1361lagg_input(struct ifnet *ifp, struct mbuf *m)
1362{
1363	struct lagg_port *lp = ifp->if_lagg;
1364	struct lagg_softc *sc = lp->lp_softc;
1365	struct ifnet *scifp = sc->sc_ifp;
1366	struct rm_priotracker tracker;
1367
1368	LAGG_RLOCK(sc, &tracker);
1369	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1370	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
1371	    sc->sc_proto == LAGG_PROTO_NONE) {
1372		LAGG_RUNLOCK(sc, &tracker);
1373		m_freem(m);
1374		return (NULL);
1375	}
1376
1377	ETHER_BPF_MTAP(scifp, m);
1378
1379	m = (*sc->sc_input)(sc, lp, m);
1380
1381	if (m != NULL) {
1382		counter_u64_add(sc->sc_ipackets, 1);
1383		counter_u64_add(sc->sc_ibytes, m->m_pkthdr.len);
1384
1385		if (scifp->if_flags & IFF_MONITOR) {
1386			m_freem(m);
1387			m = NULL;
1388		}
1389	}
1390
1391	LAGG_RUNLOCK(sc, &tracker);
1392	return (m);
1393}
1394
1395static int
1396lagg_media_change(struct ifnet *ifp)
1397{
1398	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1399
1400	if (sc->sc_ifflags & IFF_DEBUG)
1401		printf("%s\n", __func__);
1402
1403	/* Ignore */
1404	return (0);
1405}
1406
1407static void
1408lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1409{
1410	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1411	struct lagg_port *lp;
1412	struct rm_priotracker tracker;
1413
1414	imr->ifm_status = IFM_AVALID;
1415	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1416
1417	LAGG_RLOCK(sc, &tracker);
1418	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1419		if (LAGG_PORTACTIVE(lp))
1420			imr->ifm_status |= IFM_ACTIVE;
1421	}
1422	LAGG_RUNLOCK(sc, &tracker);
1423}
1424
1425static void
1426lagg_linkstate(struct lagg_softc *sc)
1427{
1428	struct lagg_port *lp;
1429	int new_link = LINK_STATE_DOWN;
1430	uint64_t speed;
1431
1432	/* Our link is considered up if at least one of our ports is active */
1433	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1434		if (lp->lp_link_state == LINK_STATE_UP) {
1435			new_link = LINK_STATE_UP;
1436			break;
1437		}
1438	}
1439	if_link_state_change(sc->sc_ifp, new_link);
1440
1441	/* Update if_baudrate to reflect the max possible speed */
1442	switch (sc->sc_proto) {
1443		case LAGG_PROTO_FAILOVER:
1444			sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
1445			    sc->sc_primary->lp_ifp->if_baudrate : 0;
1446			break;
1447		case LAGG_PROTO_ROUNDROBIN:
1448		case LAGG_PROTO_LOADBALANCE:
1449		case LAGG_PROTO_ETHERCHANNEL:
1450			speed = 0;
1451			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1452				speed += lp->lp_ifp->if_baudrate;
1453			sc->sc_ifp->if_baudrate = speed;
1454			break;
1455		case LAGG_PROTO_LACP:
1456			/* LACP updates if_baudrate itself */
1457			break;
1458	}
1459}
1460
1461static void
1462lagg_port_state(struct ifnet *ifp, int state)
1463{
1464	struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1465	struct lagg_softc *sc = NULL;
1466
1467	if (lp != NULL)
1468		sc = lp->lp_softc;
1469	if (sc == NULL)
1470		return;
1471
1472	LAGG_WLOCK(sc);
1473	lagg_linkstate(sc);
1474	if (sc->sc_linkstate != NULL)
1475		(*sc->sc_linkstate)(lp);
1476	LAGG_WUNLOCK(sc);
1477}
1478
1479struct lagg_port *
1480lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1481{
1482	struct lagg_port *lp_next, *rval = NULL;
1483	// int new_link = LINK_STATE_DOWN;
1484
1485	LAGG_RLOCK_ASSERT(sc);
1486	/*
1487	 * Search a port which reports an active link state.
1488	 */
1489
1490	if (lp == NULL)
1491		goto search;
1492	if (LAGG_PORTACTIVE(lp)) {
1493		rval = lp;
1494		goto found;
1495	}
1496	if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1497	    LAGG_PORTACTIVE(lp_next)) {
1498		rval = lp_next;
1499		goto found;
1500	}
1501
1502search:
1503	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1504		if (LAGG_PORTACTIVE(lp_next)) {
1505			rval = lp_next;
1506			goto found;
1507		}
1508	}
1509
1510found:
1511	if (rval != NULL) {
1512		/*
1513		 * The IEEE 802.1D standard assumes that a lagg with
1514		 * multiple ports is always full duplex. This is valid
1515		 * for load sharing laggs and if at least two links
1516		 * are active. Unfortunately, checking the latter would
1517		 * be too expensive at this point.
1518		 XXX
1519		if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1520		    (sc->sc_count > 1))
1521			new_link = LINK_STATE_FULL_DUPLEX;
1522		else
1523			new_link = rval->lp_link_state;
1524		 */
1525	}
1526
1527	return (rval);
1528}
1529
1530static const void *
1531lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1532{
1533	if (m->m_pkthdr.len < (off + len)) {
1534		return (NULL);
1535	} else if (m->m_len < (off + len)) {
1536		m_copydata(m, off, len, buf);
1537		return (buf);
1538	}
1539	return (mtod(m, char *) + off);
1540}
1541
1542static int
1543lagg_sysctl_active(SYSCTL_HANDLER_ARGS)
1544{
1545	struct lagg_softc *sc = (struct lagg_softc *)arg1;
1546	struct lagg_port *lp;
1547	int error;
1548
1549	/* LACP tracks active links automatically, the others do not */
1550	if (sc->sc_proto != LAGG_PROTO_LACP) {
1551		sc->sc_active = 0;
1552		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1553			sc->sc_active += LAGG_PORTACTIVE(lp);
1554	}
1555
1556	error = sysctl_handle_int(oidp, &sc->sc_active, 0, req);
1557	if ((error) || (req->newptr == NULL))
1558		return (error);
1559
1560	return (0);
1561}
1562
1563uint32_t
1564lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
1565{
1566	uint16_t etype;
1567	uint32_t p = key;
1568	int off;
1569	struct ether_header *eh;
1570	const struct ether_vlan_header *vlan;
1571#ifdef INET
1572	const struct ip *ip;
1573	const uint32_t *ports;
1574	int iphlen;
1575#endif
1576#ifdef INET6
1577	const struct ip6_hdr *ip6;
1578	uint32_t flow;
1579#endif
1580	union {
1581#ifdef INET
1582		struct ip ip;
1583#endif
1584#ifdef INET6
1585		struct ip6_hdr ip6;
1586#endif
1587		struct ether_vlan_header vlan;
1588		uint32_t port;
1589	} buf;
1590
1591
1592	off = sizeof(*eh);
1593	if (m->m_len < off)
1594		goto out;
1595	eh = mtod(m, struct ether_header *);
1596	etype = ntohs(eh->ether_type);
1597	if (sc->sc_flags & LAGG_F_HASHL2) {
1598		p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
1599		p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1600	}
1601
1602	/* Special handling for encapsulating VLAN frames */
1603	if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
1604		p = hash32_buf(&m->m_pkthdr.ether_vtag,
1605		    sizeof(m->m_pkthdr.ether_vtag), p);
1606	} else if (etype == ETHERTYPE_VLAN) {
1607		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &buf);
1608		if (vlan == NULL)
1609			goto out;
1610
1611		if (sc->sc_flags & LAGG_F_HASHL2)
1612			p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1613		etype = ntohs(vlan->evl_proto);
1614		off += sizeof(*vlan) - sizeof(*eh);
1615	}
1616
1617	switch (etype) {
1618#ifdef INET
1619	case ETHERTYPE_IP:
1620		ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
1621		if (ip == NULL)
1622			goto out;
1623
1624		if (sc->sc_flags & LAGG_F_HASHL3) {
1625			p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1626			p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1627		}
1628		if (!(sc->sc_flags & LAGG_F_HASHL4))
1629			break;
1630		switch (ip->ip_p) {
1631			case IPPROTO_TCP:
1632			case IPPROTO_UDP:
1633			case IPPROTO_SCTP:
1634				iphlen = ip->ip_hl << 2;
1635				if (iphlen < sizeof(*ip))
1636					break;
1637				off += iphlen;
1638				ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
1639				if (ports == NULL)
1640					break;
1641				p = hash32_buf(ports, sizeof(*ports), p);
1642				break;
1643		}
1644		break;
1645#endif
1646#ifdef INET6
1647	case ETHERTYPE_IPV6:
1648		if (!(sc->sc_flags & LAGG_F_HASHL3))
1649			break;
1650		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
1651		if (ip6 == NULL)
1652			goto out;
1653
1654		p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1655		p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1656		flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1657		p = hash32_buf(&flow, sizeof(flow), p);	/* IPv6 flow label */
1658		break;
1659#endif
1660	}
1661out:
1662	return (p);
1663}
1664
1665int
1666lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1667{
1668
1669	return (ifp->if_transmit)(ifp, m);
1670}
1671
1672/*
1673 * Simple round robin aggregation
1674 */
1675
1676static int
1677lagg_rr_attach(struct lagg_softc *sc)
1678{
1679	sc->sc_detach = lagg_rr_detach;
1680	sc->sc_start = lagg_rr_start;
1681	sc->sc_input = lagg_rr_input;
1682	sc->sc_port_create = NULL;
1683	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1684	sc->sc_seq = 0;
1685
1686	return (0);
1687}
1688
1689static int
1690lagg_rr_detach(struct lagg_softc *sc)
1691{
1692	return (0);
1693}
1694
1695static int
1696lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1697{
1698	struct lagg_port *lp;
1699	uint32_t p;
1700
1701	p = atomic_fetchadd_32(&sc->sc_seq, 1);
1702	p %= sc->sc_count;
1703	lp = SLIST_FIRST(&sc->sc_ports);
1704	while (p--)
1705		lp = SLIST_NEXT(lp, lp_entries);
1706
1707	/*
1708	 * Check the port's link state. This will return the next active
1709	 * port if the link is down or the port is NULL.
1710	 */
1711	if ((lp = lagg_link_active(sc, lp)) == NULL) {
1712		m_freem(m);
1713		return (ENETDOWN);
1714	}
1715
1716	/* Send mbuf */
1717	return (lagg_enqueue(lp->lp_ifp, m));
1718}
1719
1720static struct mbuf *
1721lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1722{
1723	struct ifnet *ifp = sc->sc_ifp;
1724
1725	/* Just pass in the packet to our lagg device */
1726	m->m_pkthdr.rcvif = ifp;
1727
1728	return (m);
1729}
1730
1731/*
1732 * Active failover
1733 */
1734
1735static int
1736lagg_fail_attach(struct lagg_softc *sc)
1737{
1738	sc->sc_detach = lagg_fail_detach;
1739	sc->sc_start = lagg_fail_start;
1740	sc->sc_input = lagg_fail_input;
1741	sc->sc_port_create = NULL;
1742	sc->sc_port_destroy = NULL;
1743
1744	return (0);
1745}
1746
1747static int
1748lagg_fail_detach(struct lagg_softc *sc)
1749{
1750	return (0);
1751}
1752
1753static int
1754lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
1755{
1756	struct lagg_port *lp;
1757
1758	/* Use the master port if active or the next available port */
1759	if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
1760		m_freem(m);
1761		return (ENETDOWN);
1762	}
1763
1764	/* Send mbuf */
1765	return (lagg_enqueue(lp->lp_ifp, m));
1766}
1767
1768static struct mbuf *
1769lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1770{
1771	struct ifnet *ifp = sc->sc_ifp;
1772	struct lagg_port *tmp_tp;
1773
1774	if (lp == sc->sc_primary || lagg_failover_rx_all) {
1775		m->m_pkthdr.rcvif = ifp;
1776		return (m);
1777	}
1778
1779	if (!LAGG_PORTACTIVE(sc->sc_primary)) {
1780		tmp_tp = lagg_link_active(sc, sc->sc_primary);
1781		/*
1782		 * If tmp_tp is null, we've recieved a packet when all
1783		 * our links are down. Weird, but process it anyways.
1784		 */
1785		if ((tmp_tp == NULL || tmp_tp == lp)) {
1786			m->m_pkthdr.rcvif = ifp;
1787			return (m);
1788		}
1789	}
1790
1791	m_freem(m);
1792	return (NULL);
1793}
1794
1795/*
1796 * Loadbalancing
1797 */
1798
1799static int
1800lagg_lb_attach(struct lagg_softc *sc)
1801{
1802	struct lagg_port *lp;
1803	struct lagg_lb *lb;
1804
1805	if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
1806	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
1807		return (ENOMEM);
1808
1809	sc->sc_detach = lagg_lb_detach;
1810	sc->sc_start = lagg_lb_start;
1811	sc->sc_input = lagg_lb_input;
1812	sc->sc_port_create = lagg_lb_port_create;
1813	sc->sc_port_destroy = lagg_lb_port_destroy;
1814	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1815
1816	lb->lb_key = arc4random();
1817	sc->sc_psc = (caddr_t)lb;
1818
1819	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1820		lagg_lb_port_create(lp);
1821
1822	return (0);
1823}
1824
1825static int
1826lagg_lb_detach(struct lagg_softc *sc)
1827{
1828	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1829	if (lb != NULL)
1830		free(lb, M_DEVBUF);
1831	return (0);
1832}
1833
1834static int
1835lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
1836{
1837	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1838	struct lagg_port *lp_next;
1839	int i = 0;
1840
1841	bzero(&lb->lb_ports, sizeof(lb->lb_ports));
1842	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1843		if (lp_next == lp)
1844			continue;
1845		if (i >= LAGG_MAX_PORTS)
1846			return (EINVAL);
1847		if (sc->sc_ifflags & IFF_DEBUG)
1848			printf("%s: port %s at index %d\n",
1849			    sc->sc_ifname, lp_next->lp_ifname, i);
1850		lb->lb_ports[i++] = lp_next;
1851	}
1852
1853	return (0);
1854}
1855
1856static int
1857lagg_lb_port_create(struct lagg_port *lp)
1858{
1859	struct lagg_softc *sc = lp->lp_softc;
1860	return (lagg_lb_porttable(sc, NULL));
1861}
1862
1863static void
1864lagg_lb_port_destroy(struct lagg_port *lp)
1865{
1866	struct lagg_softc *sc = lp->lp_softc;
1867	lagg_lb_porttable(sc, lp);
1868}
1869
1870static int
1871lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
1872{
1873	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1874	struct lagg_port *lp = NULL;
1875	uint32_t p = 0;
1876
1877	if (sc->use_flowid && (m->m_flags & M_FLOWID))
1878		p = m->m_pkthdr.flowid >> sc->flowid_shift;
1879	else
1880		p = lagg_hashmbuf(sc, m, lb->lb_key);
1881	p %= sc->sc_count;
1882	lp = lb->lb_ports[p];
1883
1884	/*
1885	 * Check the port's link state. This will return the next active
1886	 * port if the link is down or the port is NULL.
1887	 */
1888	if ((lp = lagg_link_active(sc, lp)) == NULL) {
1889		m_freem(m);
1890		return (ENETDOWN);
1891	}
1892
1893	/* Send mbuf */
1894	return (lagg_enqueue(lp->lp_ifp, m));
1895}
1896
1897static struct mbuf *
1898lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1899{
1900	struct ifnet *ifp = sc->sc_ifp;
1901
1902	/* Just pass in the packet to our lagg device */
1903	m->m_pkthdr.rcvif = ifp;
1904
1905	return (m);
1906}
1907
1908/*
1909 * 802.3ad LACP
1910 */
1911
1912static int
1913lagg_lacp_attach(struct lagg_softc *sc)
1914{
1915	struct lagg_port *lp;
1916	int error;
1917
1918	sc->sc_detach = lagg_lacp_detach;
1919	sc->sc_port_create = lacp_port_create;
1920	sc->sc_port_destroy = lacp_port_destroy;
1921	sc->sc_linkstate = lacp_linkstate;
1922	sc->sc_start = lagg_lacp_start;
1923	sc->sc_input = lagg_lacp_input;
1924	sc->sc_init = lacp_init;
1925	sc->sc_stop = lacp_stop;
1926	sc->sc_lladdr = lagg_lacp_lladdr;
1927	sc->sc_req = lacp_req;
1928	sc->sc_portreq = lacp_portreq;
1929
1930	error = lacp_attach(sc);
1931	if (error)
1932		return (error);
1933
1934	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1935		lacp_port_create(lp);
1936
1937	return (error);
1938}
1939
1940static int
1941lagg_lacp_detach(struct lagg_softc *sc)
1942{
1943	struct lagg_port *lp;
1944	int error;
1945
1946	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1947		lacp_port_destroy(lp);
1948
1949	/* unlocking is safe here */
1950	LAGG_WUNLOCK(sc);
1951	error = lacp_detach(sc);
1952	LAGG_WLOCK(sc);
1953
1954	return (error);
1955}
1956
1957static void
1958lagg_lacp_lladdr(struct lagg_softc *sc)
1959{
1960	struct lagg_port *lp;
1961
1962	/* purge all the lacp ports */
1963	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1964		lacp_port_destroy(lp);
1965
1966	/* add them back in */
1967	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1968		lacp_port_create(lp);
1969}
1970
1971static int
1972lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
1973{
1974	struct lagg_port *lp;
1975
1976	lp = lacp_select_tx_port(sc, m);
1977	if (lp == NULL) {
1978		m_freem(m);
1979		return (ENETDOWN);
1980	}
1981
1982	/* Send mbuf */
1983	return (lagg_enqueue(lp->lp_ifp, m));
1984}
1985
1986static struct mbuf *
1987lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1988{
1989	struct ifnet *ifp = sc->sc_ifp;
1990	struct ether_header *eh;
1991	u_short etype;
1992
1993	eh = mtod(m, struct ether_header *);
1994	etype = ntohs(eh->ether_type);
1995
1996	/* Tap off LACP control messages */
1997	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
1998		m = lacp_input(lp, m);
1999		if (m == NULL)
2000			return (NULL);
2001	}
2002
2003	/*
2004	 * If the port is not collecting or not in the active aggregator then
2005	 * free and return.
2006	 */
2007	if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
2008		m_freem(m);
2009		return (NULL);
2010	}
2011
2012	m->m_pkthdr.rcvif = ifp;
2013	return (m);
2014}
2015
2016static void
2017lagg_callout(void *arg)
2018{
2019	struct lagg_softc *sc = (struct lagg_softc *)arg;
2020	struct ifnet *ifp = sc->sc_ifp;
2021
2022	ifp->if_ipackets = counter_u64_fetch(sc->sc_ipackets);
2023	ifp->if_opackets = counter_u64_fetch(sc->sc_opackets);
2024	ifp->if_ibytes = counter_u64_fetch(sc->sc_ibytes);
2025	ifp->if_obytes = counter_u64_fetch(sc->sc_obytes);
2026
2027	callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
2028}
2029