1/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2 *
3 *		This program is free software; you can redistribute it and/or
4 *		modify it under the terms of the GNU General Public License
5 *		as published by the Free Software Foundation; either version
6 *		2 of the License, or (at your option) any later version.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 */
10
11#include <linux/module.h>
12#include <asm/uaccess.h>
13#include <asm/system.h>
14#include <asm/bitops.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/sched.h>
18#include <linux/string.h>
19#include <linux/mm.h>
20#include <linux/socket.h>
21#include <linux/sockios.h>
22#include <linux/in.h>
23#include <linux/errno.h>
24#include <linux/interrupt.h>
25#include <linux/if_ether.h>
26#include <linux/inet.h>
27#include <linux/netdevice.h>
28#include <linux/etherdevice.h>
29#include <linux/notifier.h>
30#include <linux/init.h>
31#include <net/ip.h>
32#include <net/route.h>
33#include <linux/skbuff.h>
34#include <net/sock.h>
35#include <net/pkt_sched.h>
36
37/*
38   How to setup it.
39   ----------------
40
41   After loading this module you will find a new device teqlN
42   and new qdisc with the same name. To join a slave to the equalizer
43   you should just set this qdisc on a device f.e.
44
45   # tc qdisc add dev eth0 root teql0
46   # tc qdisc add dev eth1 root teql0
47
48   That's all. Full PnP 8)
49
50   Applicability.
51   --------------
52
53   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
54      signal and generate EOI events. If you want to equalize virtual devices
55      like tunnels, use a normal eql device.
56   2. This device puts no limitations on physical slave characteristics
57      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
58      Certainly, large difference in link speeds will make the resulting
59      eqalized link unusable, because of huge packet reordering.
60      I estimate an upper useful difference as ~10 times.
61   3. If the slave requires address resolution, only protocols using
62      neighbour cache (IPv4/IPv6) will work over the equalized link.
63      Other protocols are still allowed to use the slave device directly,
64      which will not break load balancing, though native slave
65      traffic will have the highest priority.  */
66
67struct teql_master
68{
69	struct Qdisc_ops qops;
70	struct net_device dev;
71	struct Qdisc *slaves;
72	struct net_device_stats stats;
73};
74
75struct teql_sched_data
76{
77	struct Qdisc *next;
78	struct teql_master *m;
79	struct neighbour *ncache;
80	struct sk_buff_head q;
81};
82
83#define NEXT_SLAVE(q) (((struct teql_sched_data*)((q)->data))->next)
84
85#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
86
87/* "teql*" qdisc routines */
88
89static int
90teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
91{
92	struct net_device *dev = sch->dev;
93	struct teql_sched_data *q = (struct teql_sched_data *)sch->data;
94
95	__skb_queue_tail(&q->q, skb);
96	if (q->q.qlen <= dev->tx_queue_len) {
97		sch->stats.bytes += skb->len;
98		sch->stats.packets++;
99		return 0;
100	}
101
102	__skb_unlink(skb, &q->q);
103	kfree_skb(skb);
104	sch->stats.drops++;
105	return NET_XMIT_DROP;
106}
107
108static int
109teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
110{
111	struct teql_sched_data *q = (struct teql_sched_data *)sch->data;
112
113	__skb_queue_head(&q->q, skb);
114	return 0;
115}
116
117static struct sk_buff *
118teql_dequeue(struct Qdisc* sch)
119{
120	struct teql_sched_data *dat = (struct teql_sched_data *)sch->data;
121	struct sk_buff *skb;
122
123	skb = __skb_dequeue(&dat->q);
124	if (skb == NULL) {
125		struct net_device *m = dat->m->dev.qdisc->dev;
126		if (m) {
127			dat->m->slaves = sch;
128			netif_wake_queue(m);
129		}
130	}
131	sch->q.qlen = dat->q.qlen + dat->m->dev.qdisc->q.qlen;
132	return skb;
133}
134
135static __inline__ void
136teql_neigh_release(struct neighbour *n)
137{
138	if (n)
139		neigh_release(n);
140}
141
142static void
143teql_reset(struct Qdisc* sch)
144{
145	struct teql_sched_data *dat = (struct teql_sched_data *)sch->data;
146
147	skb_queue_purge(&dat->q);
148	sch->q.qlen = 0;
149	teql_neigh_release(xchg(&dat->ncache, NULL));
150}
151
152static void
153teql_destroy(struct Qdisc* sch)
154{
155	struct Qdisc *q, *prev;
156	struct teql_sched_data *dat = (struct teql_sched_data *)sch->data;
157	struct teql_master *master = dat->m;
158
159	if ((prev = master->slaves) != NULL) {
160		do {
161			q = NEXT_SLAVE(prev);
162			if (q == sch) {
163				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
164				if (q == master->slaves) {
165					master->slaves = NEXT_SLAVE(q);
166					if (q == master->slaves) {
167						master->slaves = NULL;
168						spin_lock_bh(&master->dev.queue_lock);
169						qdisc_reset(master->dev.qdisc);
170						spin_unlock_bh(&master->dev.queue_lock);
171					}
172				}
173				skb_queue_purge(&dat->q);
174				teql_neigh_release(xchg(&dat->ncache, NULL));
175				break;
176			}
177
178		} while ((prev = q) != master->slaves);
179	}
180
181	MOD_DEC_USE_COUNT;
182}
183
184static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
185{
186	struct net_device *dev = sch->dev;
187	struct teql_master *m = (struct teql_master*)sch->ops;
188	struct teql_sched_data *q = (struct teql_sched_data *)sch->data;
189
190	if (dev->hard_header_len > m->dev.hard_header_len)
191		return -EINVAL;
192
193	if (&m->dev == dev)
194		return -ELOOP;
195
196	q->m = m;
197
198	skb_queue_head_init(&q->q);
199
200	if (m->slaves) {
201		if (m->dev.flags & IFF_UP) {
202			if ((m->dev.flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
203			    || (m->dev.flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
204			    || (m->dev.flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
205			    || dev->mtu < m->dev.mtu)
206				return -EINVAL;
207		} else {
208			if (!(dev->flags&IFF_POINTOPOINT))
209				m->dev.flags &= ~IFF_POINTOPOINT;
210			if (!(dev->flags&IFF_BROADCAST))
211				m->dev.flags &= ~IFF_BROADCAST;
212			if (!(dev->flags&IFF_MULTICAST))
213				m->dev.flags &= ~IFF_MULTICAST;
214			if (dev->mtu < m->dev.mtu)
215				m->dev.mtu = dev->mtu;
216		}
217		q->next = NEXT_SLAVE(m->slaves);
218		NEXT_SLAVE(m->slaves) = sch;
219	} else {
220		q->next = sch;
221		m->slaves = sch;
222		m->dev.mtu = dev->mtu;
223		m->dev.flags = (m->dev.flags&~FMASK)|(dev->flags&FMASK);
224	}
225
226	MOD_INC_USE_COUNT;
227	return 0;
228}
229
230/* "teql*" netdevice routines */
231
232static int
233__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
234{
235	struct teql_sched_data *q = (void*)dev->qdisc->data;
236	struct neighbour *mn = skb->dst->neighbour;
237	struct neighbour *n = q->ncache;
238
239	if (mn->tbl == NULL)
240		return -EINVAL;
241	if (n && n->tbl == mn->tbl &&
242	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
243		atomic_inc(&n->refcnt);
244	} else {
245		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
246		if (IS_ERR(n))
247			return PTR_ERR(n);
248	}
249	if (neigh_event_send(n, skb_res) == 0) {
250		int err;
251		read_lock(&n->lock);
252		err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
253		read_unlock(&n->lock);
254		if (err < 0) {
255			neigh_release(n);
256			return -EINVAL;
257		}
258		teql_neigh_release(xchg(&q->ncache, n));
259		return 0;
260	}
261	neigh_release(n);
262	return (skb_res == NULL) ? -EAGAIN : 1;
263}
264
265static __inline__ int
266teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
267{
268	if (dev->hard_header == NULL ||
269	    skb->dst == NULL ||
270	    skb->dst->neighbour == NULL)
271		return 0;
272	return __teql_resolve(skb, skb_res, dev);
273}
274
275static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276{
277	struct teql_master *master = (void*)dev->priv;
278	struct Qdisc *start, *q;
279	int busy;
280	int nores;
281	int len = skb->len;
282	struct sk_buff *skb_res = NULL;
283
284	start = master->slaves;
285
286restart:
287	nores = 0;
288	busy = 0;
289
290	if ((q = start) == NULL)
291		goto drop;
292
293	do {
294		struct net_device *slave = q->dev;
295
296		if (slave->qdisc_sleeping != q)
297			continue;
298		if (netif_queue_stopped(slave) || ! netif_running(slave)) {
299			busy = 1;
300			continue;
301		}
302
303		switch (teql_resolve(skb, skb_res, slave)) {
304		case 0:
305			if (spin_trylock(&slave->xmit_lock)) {
306				slave->xmit_lock_owner = smp_processor_id();
307				if (!netif_queue_stopped(slave) &&
308				    slave->hard_start_xmit(skb, slave) == 0) {
309					slave->xmit_lock_owner = -1;
310					spin_unlock(&slave->xmit_lock);
311					master->slaves = NEXT_SLAVE(q);
312					netif_wake_queue(dev);
313					master->stats.tx_packets++;
314					master->stats.tx_bytes += len;
315					return 0;
316				}
317				slave->xmit_lock_owner = -1;
318				spin_unlock(&slave->xmit_lock);
319			}
320			if (netif_queue_stopped(dev))
321				busy = 1;
322			break;
323		case 1:
324			master->slaves = NEXT_SLAVE(q);
325			return 0;
326		default:
327			nores = 1;
328			break;
329		}
330		__skb_pull(skb, skb->nh.raw - skb->data);
331	} while ((q = NEXT_SLAVE(q)) != start);
332
333	if (nores && skb_res == NULL) {
334		skb_res = skb;
335		goto restart;
336	}
337
338	if (busy) {
339		netif_stop_queue(dev);
340		return 1;
341	}
342	master->stats.tx_errors++;
343
344drop:
345	master->stats.tx_dropped++;
346	dev_kfree_skb(skb);
347	return 0;
348}
349
350static int teql_master_open(struct net_device *dev)
351{
352	struct Qdisc * q;
353	struct teql_master *m = (void*)dev->priv;
354	int mtu = 0xFFFE;
355	unsigned flags = IFF_NOARP|IFF_MULTICAST;
356
357	if (m->slaves == NULL)
358		return -EUNATCH;
359
360	flags = FMASK;
361
362	q = m->slaves;
363	do {
364		struct net_device *slave = q->dev;
365
366		if (slave == NULL)
367			return -EUNATCH;
368
369		if (slave->mtu < mtu)
370			mtu = slave->mtu;
371		if (slave->hard_header_len > LL_MAX_HEADER)
372			return -EINVAL;
373
374		/* If all the slaves are BROADCAST, master is BROADCAST
375		   If all the slaves are PtP, master is PtP
376		   Otherwise, master is NBMA.
377		 */
378		if (!(slave->flags&IFF_POINTOPOINT))
379			flags &= ~IFF_POINTOPOINT;
380		if (!(slave->flags&IFF_BROADCAST))
381			flags &= ~IFF_BROADCAST;
382		if (!(slave->flags&IFF_MULTICAST))
383			flags &= ~IFF_MULTICAST;
384	} while ((q = NEXT_SLAVE(q)) != m->slaves);
385
386	m->dev.mtu = mtu;
387	m->dev.flags = (m->dev.flags&~FMASK) | flags;
388	netif_start_queue(&m->dev);
389	MOD_INC_USE_COUNT;
390	return 0;
391}
392
393static int teql_master_close(struct net_device *dev)
394{
395	netif_stop_queue(dev);
396	MOD_DEC_USE_COUNT;
397	return 0;
398}
399
400static struct net_device_stats *teql_master_stats(struct net_device *dev)
401{
402	struct teql_master *m = (void*)dev->priv;
403	return &m->stats;
404}
405
406static int teql_master_mtu(struct net_device *dev, int new_mtu)
407{
408	struct teql_master *m = (void*)dev->priv;
409	struct Qdisc *q;
410
411	if (new_mtu < 68)
412		return -EINVAL;
413
414	q = m->slaves;
415	if (q) {
416		do {
417			if (new_mtu > q->dev->mtu)
418				return -EINVAL;
419		} while ((q=NEXT_SLAVE(q)) != m->slaves);
420	}
421
422	dev->mtu = new_mtu;
423	return 0;
424}
425
426static int teql_master_init(struct net_device *dev)
427{
428	dev->open		= teql_master_open;
429	dev->hard_start_xmit	= teql_master_xmit;
430	dev->stop		= teql_master_close;
431	dev->get_stats		= teql_master_stats;
432	dev->change_mtu		= teql_master_mtu;
433	dev->type		= ARPHRD_VOID;
434	dev->mtu		= 1500;
435	dev->tx_queue_len	= 100;
436	dev->flags		= IFF_NOARP;
437	dev->hard_header_len	= LL_MAX_HEADER;
438	return 0;
439}
440
441static struct teql_master the_master = {
442{
443	NULL,
444	NULL,
445	"",
446	sizeof(struct teql_sched_data),
447
448	teql_enqueue,
449	teql_dequeue,
450	teql_requeue,
451	NULL,
452
453	teql_qdisc_init,
454	teql_reset,
455	teql_destroy,
456	NULL,
457},};
458
459
460#ifdef MODULE
461int init_module(void)
462#else
463int __init teql_init(void)
464#endif
465{
466	int err;
467
468	rtnl_lock();
469
470	the_master.dev.priv = (void*)&the_master;
471	err = dev_alloc_name(&the_master.dev, "teql%d");
472	if (err < 0)
473		return err;
474	memcpy(the_master.qops.id, the_master.dev.name, IFNAMSIZ);
475	the_master.dev.init = teql_master_init;
476
477	err = register_netdevice(&the_master.dev);
478	if (err == 0) {
479		err = register_qdisc(&the_master.qops);
480		if (err)
481			unregister_netdevice(&the_master.dev);
482	}
483	rtnl_unlock();
484	return err;
485}
486
487#ifdef MODULE
488void cleanup_module(void)
489{
490	rtnl_lock();
491	unregister_qdisc(&the_master.qops);
492	unregister_netdevice(&the_master.dev);
493	rtnl_unlock();
494}
495#endif
496MODULE_LICENSE("GPL");
497