1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IPv4 FIB: lookup engine and maintenance routines.
7 *
8 * Version:	$Id: fib_hash.c,v 1.1.1.1 2008/10/15 03:27:33 james26_jang Exp $
9 *
10 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 *		This program is free software; you can redistribute it and/or
13 *		modify it under the terms of the GNU General Public License
14 *		as published by the Free Software Foundation; either version
15 *		2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <asm/bitops.h>
22#include <linux/types.h>
23#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
36#include <linux/netlink.h>
37#include <linux/init.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45
46#define FTprint(a...)
47/*
48   printk(KERN_DEBUG a)
49 */
50
51static kmem_cache_t * fn_hash_kmem;
52
53/*
54   These bizarre types are just to force strict type checking.
55   When I reversed order of bytes and changed to natural mask lengths,
56   I forgot to make fixes in several places. Now I am lazy to return
57   it back.
58 */
59
60typedef struct {
61	u32	datum;
62} fn_key_t;
63
64typedef struct {
65	u32	datum;
66} fn_hash_idx_t;
67
68struct fib_node
69{
70	struct fib_node		*fn_next;
71	struct fib_info		*fn_info;
72#define FIB_INFO(f)	((f)->fn_info)
73	fn_key_t		fn_key;
74	u8			fn_tos;
75	u8			fn_type;
76	u8			fn_scope;
77	u8			fn_state;
78};
79
80#define FN_S_ZOMBIE	1
81#define FN_S_ACCESSED	2
82
83static int fib_hash_zombies;
84
85struct fn_zone
86{
87	struct fn_zone	*fz_next;	/* Next not empty zone	*/
88	struct fib_node	**fz_hash;	/* Hash table pointer	*/
89	int		fz_nent;	/* Number of entries	*/
90
91	int		fz_divisor;	/* Hash divisor		*/
92	u32		fz_hashmask;	/* (1<<fz_divisor) - 1	*/
93#define FZ_HASHMASK(fz)	((fz)->fz_hashmask)
94
95	int		fz_order;	/* Zone order		*/
96	u32		fz_mask;
97#define FZ_MASK(fz)	((fz)->fz_mask)
98};
99
100/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
101   can be cheaper than memory lookup, so that FZ_* macros are used.
102 */
103
104struct fn_hash
105{
106	struct fn_zone	*fn_zones[33];
107	struct fn_zone	*fn_zone_list;
108};
109
110static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
111{
112	u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
113	h ^= (h>>20);
114	h ^= (h>>10);
115	h ^= (h>>5);
116	h &= FZ_HASHMASK(fz);
117	return *(fn_hash_idx_t*)&h;
118}
119
120#define fz_key_0(key)		((key).datum = 0)
121#define fz_prefix(key,fz)	((key).datum)
122
123static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz)
124{
125	fn_key_t k;
126	k.datum = dst & FZ_MASK(fz);
127	return k;
128}
129
130static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz)
131{
132	return &fz->fz_hash[fn_hash(key, fz).datum];
133}
134
135static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz)
136{
137	return fz->fz_hash[fn_hash(key, fz).datum];
138}
139
140extern __inline__ int fn_key_eq(fn_key_t a, fn_key_t b)
141{
142	return a.datum == b.datum;
143}
144
145extern __inline__ int fn_key_leq(fn_key_t a, fn_key_t b)
146{
147	return a.datum <= b.datum;
148}
149
150static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
151
152#define FZ_MAX_DIVISOR 1024
153
154#ifdef CONFIG_IP_ROUTE_LARGE_TABLES
155
156/* The fib hash lock must be held when this is called. */
157static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
158				       struct fib_node **old_ht,
159				       int old_divisor)
160{
161	int i;
162	struct fib_node *f, **fp, *next;
163
164	for (i=0; i<old_divisor; i++) {
165		for (f=old_ht[i]; f; f=next) {
166			next = f->fn_next;
167			for (fp = fz_chain_p(f->fn_key, fz);
168			     *fp && fn_key_leq((*fp)->fn_key, f->fn_key);
169			     fp = &(*fp)->fn_next)
170				/* NONE */;
171			f->fn_next = *fp;
172			*fp = f;
173		}
174	}
175}
176
177static void fn_rehash_zone(struct fn_zone *fz)
178{
179	struct fib_node **ht, **old_ht;
180	int old_divisor, new_divisor;
181	u32 new_hashmask;
182
183	old_divisor = fz->fz_divisor;
184
185	switch (old_divisor) {
186	case 16:
187		new_divisor = 256;
188		new_hashmask = 0xFF;
189		break;
190	case 256:
191		new_divisor = 1024;
192		new_hashmask = 0x3FF;
193		break;
194	default:
195		printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
196		return;
197	}
198#if RT_CACHE_DEBUG >= 2
199	printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
200#endif
201
202	ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL);
203
204	if (ht)	{
205		memset(ht, 0, new_divisor*sizeof(struct fib_node*));
206		write_lock_bh(&fib_hash_lock);
207		old_ht = fz->fz_hash;
208		fz->fz_hash = ht;
209		fz->fz_hashmask = new_hashmask;
210		fz->fz_divisor = new_divisor;
211		fn_rebuild_zone(fz, old_ht, old_divisor);
212		write_unlock_bh(&fib_hash_lock);
213		kfree(old_ht);
214	}
215}
216#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */
217
218static void fn_free_node(struct fib_node * f)
219{
220	fib_release_info(FIB_INFO(f));
221	kmem_cache_free(fn_hash_kmem, f);
222}
223
224
225static struct fn_zone *
226fn_new_zone(struct fn_hash *table, int z)
227{
228	int i;
229	struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
230	if (!fz)
231		return NULL;
232
233	memset(fz, 0, sizeof(struct fn_zone));
234	if (z) {
235		fz->fz_divisor = 16;
236		fz->fz_hashmask = 0xF;
237	} else {
238		fz->fz_divisor = 1;
239		fz->fz_hashmask = 0;
240	}
241	fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL);
242	if (!fz->fz_hash) {
243		kfree(fz);
244		return NULL;
245	}
246	memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
247	fz->fz_order = z;
248	fz->fz_mask = inet_make_mask(z);
249
250	/* Find the first not empty zone with more specific mask */
251	for (i=z+1; i<=32; i++)
252		if (table->fn_zones[i])
253			break;
254	write_lock_bh(&fib_hash_lock);
255	if (i>32) {
256		/* No more specific masks, we are the first. */
257		fz->fz_next = table->fn_zone_list;
258		table->fn_zone_list = fz;
259	} else {
260		fz->fz_next = table->fn_zones[i]->fz_next;
261		table->fn_zones[i]->fz_next = fz;
262	}
263	table->fn_zones[z] = fz;
264	write_unlock_bh(&fib_hash_lock);
265	return fz;
266}
267
268static int
269fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
270{
271	int err;
272	struct fn_zone *fz;
273	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
274
275	read_lock(&fib_hash_lock);
276	for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
277		struct fib_node *f;
278		fn_key_t k = fz_key(key->dst, fz);
279
280		for (f = fz_chain(k, fz); f; f = f->fn_next) {
281			if (!fn_key_eq(k, f->fn_key)) {
282				if (fn_key_leq(k, f->fn_key))
283					break;
284				else
285					continue;
286			}
287#ifdef CONFIG_IP_ROUTE_TOS
288			if (f->fn_tos && f->fn_tos != key->tos)
289				continue;
290#endif
291			f->fn_state |= FN_S_ACCESSED;
292
293			if (f->fn_state&FN_S_ZOMBIE)
294				continue;
295			if (f->fn_scope < key->scope)
296				continue;
297
298			err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
299			if (err == 0) {
300				res->type = f->fn_type;
301				res->scope = f->fn_scope;
302				res->prefixlen = fz->fz_order;
303				goto out;
304			}
305			if (err < 0)
306				goto out;
307		}
308	}
309	err = 1;
310out:
311	read_unlock(&fib_hash_lock);
312	return err;
313}
314
315static int fn_hash_last_dflt=-1;
316
317static int fib_detect_death(struct fib_info *fi, int order,
318			    struct fib_info **last_resort, int *last_idx)
319{
320	struct neighbour *n;
321	int state = NUD_NONE;
322
323	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
324	if (n) {
325		state = n->nud_state;
326		neigh_release(n);
327	}
328	if (state==NUD_REACHABLE)
329		return 0;
330	if ((state&NUD_VALID) && order != fn_hash_last_dflt)
331		return 0;
332	if ((state&NUD_VALID) ||
333	    (*last_idx<0 && order > fn_hash_last_dflt)) {
334		*last_resort = fi;
335		*last_idx = order;
336	}
337	return 1;
338}
339
340static void
341fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
342{
343	int order, last_idx;
344	struct fib_node *f;
345	struct fib_info *fi = NULL;
346	struct fib_info *last_resort;
347	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
348	struct fn_zone *fz = t->fn_zones[0];
349
350	if (fz == NULL)
351		return;
352
353	last_idx = -1;
354	last_resort = NULL;
355	order = -1;
356
357	read_lock(&fib_hash_lock);
358	for (f = fz->fz_hash[0]; f; f = f->fn_next) {
359		struct fib_info *next_fi = FIB_INFO(f);
360
361		if ((f->fn_state&FN_S_ZOMBIE) ||
362		    f->fn_scope != res->scope ||
363		    f->fn_type != RTN_UNICAST)
364			continue;
365
366		if (next_fi->fib_priority > res->fi->fib_priority)
367			break;
368		if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
369			continue;
370		f->fn_state |= FN_S_ACCESSED;
371
372		if (fi == NULL) {
373			if (next_fi != res->fi)
374				break;
375		} else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
376			if (res->fi)
377				fib_info_put(res->fi);
378			res->fi = fi;
379			atomic_inc(&fi->fib_clntref);
380			fn_hash_last_dflt = order;
381			goto out;
382		}
383		fi = next_fi;
384		order++;
385	}
386
387	if (order<=0 || fi==NULL) {
388		fn_hash_last_dflt = -1;
389		goto out;
390	}
391
392	if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
393		if (res->fi)
394			fib_info_put(res->fi);
395		res->fi = fi;
396		atomic_inc(&fi->fib_clntref);
397		fn_hash_last_dflt = order;
398		goto out;
399	}
400
401	if (last_idx >= 0) {
402		if (res->fi)
403			fib_info_put(res->fi);
404		res->fi = last_resort;
405		if (last_resort)
406			atomic_inc(&last_resort->fib_clntref);
407	}
408	fn_hash_last_dflt = last_idx;
409out:
410	read_unlock(&fib_hash_lock);
411}
412
413#define FIB_SCAN(f, fp) \
414for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
415
416#define FIB_SCAN_KEY(f, fp, key) \
417for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
418
419#ifndef CONFIG_IP_ROUTE_TOS
420#define FIB_SCAN_TOS(f, fp, key, tos) FIB_SCAN_KEY(f, fp, key)
421#else
422#define FIB_SCAN_TOS(f, fp, key, tos) \
423for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)) && \
424     (f)->fn_tos == (tos) ; (fp) = &(f)->fn_next)
425#endif
426
427
428static void rtmsg_fib(int, struct fib_node*, int, int,
429		      struct nlmsghdr *n,
430		      struct netlink_skb_parms *);
431
432static int
433fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
434		struct nlmsghdr *n, struct netlink_skb_parms *req)
435{
436	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
437	struct fib_node *new_f, *f, **fp, **del_fp;
438	struct fn_zone *fz;
439	struct fib_info *fi;
440
441	int z = r->rtm_dst_len;
442	int type = r->rtm_type;
443#ifdef CONFIG_IP_ROUTE_TOS
444	u8 tos = r->rtm_tos;
445#endif
446	fn_key_t key;
447	int err;
448
449FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
450*(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1,
451rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
452	if (z > 32)
453		return -EINVAL;
454	fz = table->fn_zones[z];
455	if (!fz && !(fz = fn_new_zone(table, z)))
456		return -ENOBUFS;
457
458	fz_key_0(key);
459	if (rta->rta_dst) {
460		u32 dst;
461		memcpy(&dst, rta->rta_dst, 4);
462		if (dst & ~FZ_MASK(fz))
463			return -EINVAL;
464		key = fz_key(dst, fz);
465	}
466
467	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
468		return err;
469
470#ifdef CONFIG_IP_ROUTE_LARGE_TABLES
471	if (fz->fz_nent > (fz->fz_divisor<<2) &&
472	    fz->fz_divisor < FZ_MAX_DIVISOR &&
473	    (z==32 || (1<<z) > fz->fz_divisor))
474		fn_rehash_zone(fz);
475#endif
476
477	fp = fz_chain_p(key, fz);
478
479
480	/*
481	 * Scan list to find the first route with the same destination
482	 */
483	FIB_SCAN(f, fp) {
484		if (fn_key_leq(key,f->fn_key))
485			break;
486	}
487
488#ifdef CONFIG_IP_ROUTE_TOS
489	/*
490	 * Find route with the same destination and tos.
491	 */
492	FIB_SCAN_KEY(f, fp, key) {
493		if (f->fn_tos <= tos)
494			break;
495	}
496#endif
497
498	del_fp = NULL;
499
500	if (f && (f->fn_state&FN_S_ZOMBIE) &&
501#ifdef CONFIG_IP_ROUTE_TOS
502	    f->fn_tos == tos &&
503#endif
504	    fn_key_eq(f->fn_key, key)) {
505		del_fp = fp;
506		fp = &f->fn_next;
507		f = *fp;
508		goto create;
509	}
510
511	FIB_SCAN_TOS(f, fp, key, tos) {
512		if (fi->fib_priority <= FIB_INFO(f)->fib_priority)
513			break;
514	}
515
516	/* Now f==*fp points to the first node with the same
517	   keys [prefix,tos,priority], if such key already
518	   exists or to the node, before which we will insert new one.
519	 */
520
521	if (f &&
522#ifdef CONFIG_IP_ROUTE_TOS
523	    f->fn_tos == tos &&
524#endif
525	    fn_key_eq(f->fn_key, key) &&
526	    fi->fib_priority == FIB_INFO(f)->fib_priority) {
527		struct fib_node **ins_fp;
528
529		err = -EEXIST;
530		if (n->nlmsg_flags&NLM_F_EXCL)
531			goto out;
532
533		if (n->nlmsg_flags&NLM_F_REPLACE) {
534			del_fp = fp;
535			fp = &f->fn_next;
536			f = *fp;
537			goto replace;
538		}
539
540		ins_fp = fp;
541		err = -EEXIST;
542
543		FIB_SCAN_TOS(f, fp, key, tos) {
544			if (fi->fib_priority != FIB_INFO(f)->fib_priority)
545				break;
546			if (f->fn_type == type && f->fn_scope == r->rtm_scope
547			    && FIB_INFO(f) == fi)
548				goto out;
549		}
550
551		if (!(n->nlmsg_flags&NLM_F_APPEND)) {
552			fp = ins_fp;
553			f = *fp;
554		}
555	}
556
557create:
558	err = -ENOENT;
559	if (!(n->nlmsg_flags&NLM_F_CREATE))
560		goto out;
561
562replace:
563	err = -ENOBUFS;
564	new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
565	if (new_f == NULL)
566		goto out;
567
568	memset(new_f, 0, sizeof(struct fib_node));
569
570	new_f->fn_key = key;
571#ifdef CONFIG_IP_ROUTE_TOS
572	new_f->fn_tos = tos;
573#endif
574	new_f->fn_type = type;
575	new_f->fn_scope = r->rtm_scope;
576	FIB_INFO(new_f) = fi;
577
578	/*
579	 * Insert new entry to the list.
580	 */
581
582	new_f->fn_next = f;
583	write_lock_bh(&fib_hash_lock);
584	*fp = new_f;
585	write_unlock_bh(&fib_hash_lock);
586	fz->fz_nent++;
587
588	if (del_fp) {
589		f = *del_fp;
590		/* Unlink replaced node */
591		write_lock_bh(&fib_hash_lock);
592		*del_fp = f->fn_next;
593		write_unlock_bh(&fib_hash_lock);
594
595		if (!(f->fn_state&FN_S_ZOMBIE))
596			rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
597		if (f->fn_state&FN_S_ACCESSED)
598			rt_cache_flush(-1);
599		fn_free_node(f);
600		fz->fz_nent--;
601	} else {
602		rt_cache_flush(-1);
603	}
604	rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
605	return 0;
606
607out:
608	fib_release_info(fi);
609	return err;
610}
611
612
613static int
614fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
615		struct nlmsghdr *n, struct netlink_skb_parms *req)
616{
617	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
618	struct fib_node **fp, **del_fp, *f;
619	int z = r->rtm_dst_len;
620	struct fn_zone *fz;
621	fn_key_t key;
622	int matched;
623#ifdef CONFIG_IP_ROUTE_TOS
624	u8 tos = r->rtm_tos;
625#endif
626
627FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
628       *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1);
629	if (z > 32)
630		return -EINVAL;
631	if ((fz  = table->fn_zones[z]) == NULL)
632		return -ESRCH;
633
634	fz_key_0(key);
635	if (rta->rta_dst) {
636		u32 dst;
637		memcpy(&dst, rta->rta_dst, 4);
638		if (dst & ~FZ_MASK(fz))
639			return -EINVAL;
640		key = fz_key(dst, fz);
641	}
642
643	fp = fz_chain_p(key, fz);
644
645
646	FIB_SCAN(f, fp) {
647		if (fn_key_eq(f->fn_key, key))
648			break;
649		if (fn_key_leq(key, f->fn_key)) {
650			return -ESRCH;
651		}
652	}
653#ifdef CONFIG_IP_ROUTE_TOS
654	FIB_SCAN_KEY(f, fp, key) {
655		if (f->fn_tos == tos)
656			break;
657	}
658#endif
659
660	matched = 0;
661	del_fp = NULL;
662	FIB_SCAN_TOS(f, fp, key, tos) {
663		struct fib_info * fi = FIB_INFO(f);
664
665		if (f->fn_state&FN_S_ZOMBIE) {
666			return -ESRCH;
667		}
668		matched++;
669
670		if (del_fp == NULL &&
671		    (!r->rtm_type || f->fn_type == r->rtm_type) &&
672		    (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
673		    (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) &&
674		    fib_nh_match(r, n, rta, fi) == 0)
675			del_fp = fp;
676	}
677
678	if (del_fp) {
679		f = *del_fp;
680		rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
681
682		if (matched != 1) {
683			write_lock_bh(&fib_hash_lock);
684			*del_fp = f->fn_next;
685			write_unlock_bh(&fib_hash_lock);
686
687			if (f->fn_state&FN_S_ACCESSED)
688				rt_cache_flush(-1);
689			fn_free_node(f);
690			fz->fz_nent--;
691		} else {
692			f->fn_state |= FN_S_ZOMBIE;
693			if (f->fn_state&FN_S_ACCESSED) {
694				f->fn_state &= ~FN_S_ACCESSED;
695				rt_cache_flush(-1);
696			}
697			if (++fib_hash_zombies > 128)
698				fib_flush();
699		}
700
701		return 0;
702	}
703	return -ESRCH;
704}
705
706extern __inline__ int
707fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table)
708{
709	int found = 0;
710	struct fib_node *f;
711
712	while ((f = *fp) != NULL) {
713		struct fib_info *fi = FIB_INFO(f);
714
715		if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) {
716			write_lock_bh(&fib_hash_lock);
717			*fp = f->fn_next;
718			write_unlock_bh(&fib_hash_lock);
719
720			fn_free_node(f);
721			found++;
722			continue;
723		}
724		fp = &f->fn_next;
725	}
726	return found;
727}
728
729static int fn_hash_flush(struct fib_table *tb)
730{
731	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
732	struct fn_zone *fz;
733	int found = 0;
734
735	fib_hash_zombies = 0;
736	for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
737		int i;
738		int tmp = 0;
739		for (i=fz->fz_divisor-1; i>=0; i--)
740			tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table);
741		fz->fz_nent -= tmp;
742		found += tmp;
743	}
744	return found;
745}
746
747
748#ifdef CONFIG_PROC_FS
749
750static int fn_hash_get_info(struct fib_table *tb, char *buffer, int first, int count)
751{
752	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
753	struct fn_zone *fz;
754	int pos = 0;
755	int n = 0;
756
757	read_lock(&fib_hash_lock);
758	for (fz=table->fn_zone_list; fz; fz = fz->fz_next) {
759		int i;
760		struct fib_node *f;
761		int maxslot = fz->fz_divisor;
762		struct fib_node **fp = fz->fz_hash;
763
764		if (fz->fz_nent == 0)
765			continue;
766
767		if (pos + fz->fz_nent <= first) {
768			pos += fz->fz_nent;
769			continue;
770		}
771
772		for (i=0; i < maxslot; i++, fp++) {
773			for (f = *fp; f; f = f->fn_next) {
774				if (++pos <= first)
775					continue;
776				fib_node_get_info(f->fn_type,
777						  f->fn_state&FN_S_ZOMBIE,
778						  FIB_INFO(f),
779						  fz_prefix(f->fn_key, fz),
780						  FZ_MASK(fz), buffer);
781				buffer += 128;
782				if (++n >= count)
783					goto out;
784			}
785		}
786	}
787out:
788	read_unlock(&fib_hash_lock);
789  	return n;
790}
791#endif
792
793
794static __inline__ int
795fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
796		     struct fib_table *tb,
797		     struct fn_zone *fz,
798		     struct fib_node *f)
799{
800	int i, s_i;
801
802	s_i = cb->args[3];
803	for (i=0; f; i++, f=f->fn_next) {
804		if (i < s_i) continue;
805		if (f->fn_state&FN_S_ZOMBIE) continue;
806		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
807				  RTM_NEWROUTE,
808				  tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope,
809				  &f->fn_key, fz->fz_order, f->fn_tos,
810				  f->fn_info) < 0) {
811			cb->args[3] = i;
812			return -1;
813		}
814	}
815	cb->args[3] = i;
816	return skb->len;
817}
818
819static __inline__ int
820fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
821		   struct fib_table *tb,
822		   struct fn_zone *fz)
823{
824	int h, s_h;
825
826	s_h = cb->args[2];
827	for (h=0; h < fz->fz_divisor; h++) {
828		if (h < s_h) continue;
829		if (h > s_h)
830			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
831		if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL)
832			continue;
833		if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) {
834			cb->args[2] = h;
835			return -1;
836		}
837	}
838	cb->args[2] = h;
839	return skb->len;
840}
841
842static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
843{
844	int m, s_m;
845	struct fn_zone *fz;
846	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
847
848	s_m = cb->args[1];
849	read_lock(&fib_hash_lock);
850	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
851		if (m < s_m) continue;
852		if (m > s_m)
853			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
854		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
855			cb->args[1] = m;
856			read_unlock(&fib_hash_lock);
857			return -1;
858		}
859	}
860	read_unlock(&fib_hash_lock);
861	cb->args[1] = m;
862	return skb->len;
863}
864
865static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
866		      struct nlmsghdr *n, struct netlink_skb_parms *req)
867{
868	struct sk_buff *skb;
869	u32 pid = req ? req->pid : 0;
870	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
871
872	skb = alloc_skb(size, GFP_KERNEL);
873	if (!skb)
874		return;
875
876	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
877			  f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
878			  FIB_INFO(f)) < 0) {
879		kfree_skb(skb);
880		return;
881	}
882	NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
883	if (n->nlmsg_flags&NLM_F_ECHO)
884		atomic_inc(&skb->users);
885	netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
886	if (n->nlmsg_flags&NLM_F_ECHO)
887		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
888}
889
890#ifdef CONFIG_IP_MULTIPLE_TABLES
891struct fib_table * fib_hash_init(int id)
892#else
893struct fib_table * __init fib_hash_init(int id)
894#endif
895{
896	struct fib_table *tb;
897
898	if (fn_hash_kmem == NULL)
899		fn_hash_kmem = kmem_cache_create("ip_fib_hash",
900						 sizeof(struct fib_node),
901						 0, SLAB_HWCACHE_ALIGN,
902						 NULL, NULL);
903
904	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL);
905	if (tb == NULL)
906		return NULL;
907
908	tb->tb_id = id;
909	tb->tb_lookup = fn_hash_lookup;
910	tb->tb_insert = fn_hash_insert;
911	tb->tb_delete = fn_hash_delete;
912	tb->tb_flush = fn_hash_flush;
913	tb->tb_select_default = fn_hash_select_default;
914	tb->tb_dump = fn_hash_dump;
915#ifdef CONFIG_PROC_FS
916	tb->tb_get_info = fn_hash_get_info;
917#endif
918	memset(tb->tb_data, 0, sizeof(struct fn_hash));
919	return tb;
920}
921