radix.c revision 128433
1/*
2 * Copyright (c) 1988, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)radix.c	8.5 (Berkeley) 5/19/95
30 * $FreeBSD: head/sys/net/radix.c 128433 2004-04-19 17:28:39Z luigi $
31 */
32
33/*
34 * Routines to build and maintain radix trees for routing lookups.
35 */
36#ifndef _RADIX_H_
37#include <sys/param.h>
38#ifdef	_KERNEL
39#include <sys/lock.h>
40#include <sys/mutex.h>
41#include <sys/systm.h>
42#include <sys/malloc.h>
43#include <sys/domain.h>
44#else
45#include <stdlib.h>
46#endif
47#include <sys/syslog.h>
48#include <net/radix.h>
49#endif
50
51static int	rn_walktree_from(struct radix_node_head *h, void *a, void *m,
52		    walktree_f_t *f, void *w);
53static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
54static struct radix_node
55	 *rn_insert(void *, struct radix_node_head *, int *,
56	     struct radix_node [2]),
57	 *rn_newpair(void *, int, struct radix_node[2]),
58	 *rn_search(void *, struct radix_node *),
59	 *rn_search_m(void *, struct radix_node *, void *);
60
61static int	max_keylen;
62static struct radix_mask *rn_mkfreelist;
63static struct radix_node_head *mask_rnhead;
64/*
65 * Work area -- the following point to 3 buffers of size max_keylen,
66 * allocated in this order in a block of memory malloc'ed by rn_init.
67 */
68static char *rn_zeros, *rn_ones, *addmask_key;
69
70#define MKGet(m) {						\
71	if (rn_mkfreelist) {					\
72		m = rn_mkfreelist;				\
73		rn_mkfreelist = (m)->rm_mklist;			\
74	} else							\
75		R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); }
76
77#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
78
79#define rn_masktop (mask_rnhead->rnh_treetop)
80
81static int	rn_lexobetter(void *m_arg, void *n_arg);
82static struct radix_mask *
83		rn_new_radix_mask(struct radix_node *tt,
84		    struct radix_mask *next);
85static int	rn_satisfies_leaf(char *trial, struct radix_node *leaf,
86		    int skip);
87
88/*
89 * The data structure for the keys is a radix tree with one way
90 * branching removed.  The index rn_bit at an internal node n represents a bit
91 * position to be tested.  The tree is arranged so that all descendants
92 * of a node n have keys whose bits all agree up to position rn_bit - 1.
93 * (We say the index of n is rn_bit.)
94 *
95 * There is at least one descendant which has a one bit at position rn_bit,
96 * and at least one with a zero there.
97 *
98 * A route is determined by a pair of key and mask.  We require that the
99 * bit-wise logical and of the key and mask to be the key.
100 * We define the index of a route to associated with the mask to be
101 * the first bit number in the mask where 0 occurs (with bit number 0
102 * representing the highest order bit).
103 *
104 * We say a mask is normal if every bit is 0, past the index of the mask.
105 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
106 * and m is a normal mask, then the route applies to every descendant of n.
107 * If the index(m) < rn_bit, this implies the trailing last few bits of k
108 * before bit b are all 0, (and hence consequently true of every descendant
109 * of n), so the route applies to all descendants of the node as well.
110 *
111 * Similar logic shows that a non-normal mask m such that
112 * index(m) <= index(n) could potentially apply to many children of n.
113 * Thus, for each non-host route, we attach its mask to a list at an internal
114 * node as high in the tree as we can go.
115 *
116 * The present version of the code makes use of normal routes in short-
117 * circuiting an explict mask and compare operation when testing whether
118 * a key satisfies a normal route, and also in remembering the unique leaf
119 * that governs a subtree.
120 */
121
122static struct radix_node *
123rn_search(v_arg, head)
124	void *v_arg;
125	struct radix_node *head;
126{
127	register struct radix_node *x;
128	register caddr_t v;
129
130	for (x = head, v = v_arg; x->rn_bit >= 0;) {
131		if (x->rn_bmask & v[x->rn_offset])
132			x = x->rn_right;
133		else
134			x = x->rn_left;
135	}
136	return (x);
137}
138
139static struct radix_node *
140rn_search_m(v_arg, head, m_arg)
141	struct radix_node *head;
142	void *v_arg, *m_arg;
143{
144	register struct radix_node *x;
145	register caddr_t v = v_arg, m = m_arg;
146
147	for (x = head; x->rn_bit >= 0;) {
148		if ((x->rn_bmask & m[x->rn_offset]) &&
149		    (x->rn_bmask & v[x->rn_offset]))
150			x = x->rn_right;
151		else
152			x = x->rn_left;
153	}
154	return x;
155}
156
157int
158rn_refines(m_arg, n_arg)
159	void *m_arg, *n_arg;
160{
161	register caddr_t m = m_arg, n = n_arg;
162	register caddr_t lim, lim2 = lim = n + *(u_char *)n;
163	int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
164	int masks_are_equal = 1;
165
166	if (longer > 0)
167		lim -= longer;
168	while (n < lim) {
169		if (*n & ~(*m))
170			return 0;
171		if (*n++ != *m++)
172			masks_are_equal = 0;
173	}
174	while (n < lim2)
175		if (*n++)
176			return 0;
177	if (masks_are_equal && (longer < 0))
178		for (lim2 = m - longer; m < lim2; )
179			if (*m++)
180				return 1;
181	return (!masks_are_equal);
182}
183
184struct radix_node *
185rn_lookup(v_arg, m_arg, head)
186	void *v_arg, *m_arg;
187	struct radix_node_head *head;
188{
189	register struct radix_node *x;
190	caddr_t netmask = 0;
191
192	if (m_arg) {
193		x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
194		if (x == 0)
195			return (0);
196		netmask = x->rn_key;
197	}
198	x = rn_match(v_arg, head);
199	if (x && netmask) {
200		while (x && x->rn_mask != netmask)
201			x = x->rn_dupedkey;
202	}
203	return x;
204}
205
206static int
207rn_satisfies_leaf(trial, leaf, skip)
208	char *trial;
209	register struct radix_node *leaf;
210	int skip;
211{
212	register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
213	char *cplim;
214	int length = min(*(u_char *)cp, *(u_char *)cp2);
215
216	if (cp3 == 0)
217		cp3 = rn_ones;
218	else
219		length = min(length, *(u_char *)cp3);
220	cplim = cp + length; cp3 += skip; cp2 += skip;
221	for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
222		if ((*cp ^ *cp2) & *cp3)
223			return 0;
224	return 1;
225}
226
227struct radix_node *
228rn_match(v_arg, head)
229	void *v_arg;
230	struct radix_node_head *head;
231{
232	caddr_t v = v_arg;
233	register struct radix_node *t = head->rnh_treetop, *x;
234	register caddr_t cp = v, cp2;
235	caddr_t cplim;
236	struct radix_node *saved_t, *top = t;
237	int off = t->rn_offset, vlen = *(u_char *)cp, matched_off;
238	register int test, b, rn_bit;
239
240	/*
241	 * Open code rn_search(v, top) to avoid overhead of extra
242	 * subroutine call.
243	 */
244	for (; t->rn_bit >= 0; ) {
245		if (t->rn_bmask & cp[t->rn_offset])
246			t = t->rn_right;
247		else
248			t = t->rn_left;
249	}
250	/*
251	 * See if we match exactly as a host destination
252	 * or at least learn how many bits match, for normal mask finesse.
253	 *
254	 * It doesn't hurt us to limit how many bytes to check
255	 * to the length of the mask, since if it matches we had a genuine
256	 * match and the leaf we have is the most specific one anyway;
257	 * if it didn't match with a shorter length it would fail
258	 * with a long one.  This wins big for class B&C netmasks which
259	 * are probably the most common case...
260	 */
261	if (t->rn_mask)
262		vlen = *(u_char *)t->rn_mask;
263	cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
264	for (; cp < cplim; cp++, cp2++)
265		if (*cp != *cp2)
266			goto on1;
267	/*
268	 * This extra grot is in case we are explicitly asked
269	 * to look up the default.  Ugh!
270	 *
271	 * Never return the root node itself, it seems to cause a
272	 * lot of confusion.
273	 */
274	if (t->rn_flags & RNF_ROOT)
275		t = t->rn_dupedkey;
276	return t;
277on1:
278	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
279	for (b = 7; (test >>= 1) > 0;)
280		b--;
281	matched_off = cp - v;
282	b += matched_off << 3;
283	rn_bit = -1 - b;
284	/*
285	 * If there is a host route in a duped-key chain, it will be first.
286	 */
287	if ((saved_t = t)->rn_mask == 0)
288		t = t->rn_dupedkey;
289	for (; t; t = t->rn_dupedkey)
290		/*
291		 * Even if we don't match exactly as a host,
292		 * we may match if the leaf we wound up at is
293		 * a route to a net.
294		 */
295		if (t->rn_flags & RNF_NORMAL) {
296			if (rn_bit <= t->rn_bit)
297				return t;
298		} else if (rn_satisfies_leaf(v, t, matched_off))
299				return t;
300	t = saved_t;
301	/* start searching up the tree */
302	do {
303		register struct radix_mask *m;
304		t = t->rn_parent;
305		m = t->rn_mklist;
306		/*
307		 * If non-contiguous masks ever become important
308		 * we can restore the masking and open coding of
309		 * the search and satisfaction test and put the
310		 * calculation of "off" back before the "do".
311		 */
312		while (m) {
313			if (m->rm_flags & RNF_NORMAL) {
314				if (rn_bit <= m->rm_bit)
315					return (m->rm_leaf);
316			} else {
317				off = min(t->rn_offset, matched_off);
318				x = rn_search_m(v, t, m->rm_mask);
319				while (x && x->rn_mask != m->rm_mask)
320					x = x->rn_dupedkey;
321				if (x && rn_satisfies_leaf(v, x, off))
322					return x;
323			}
324			m = m->rm_mklist;
325		}
326	} while (t != top);
327	return 0;
328}
329
330#ifdef RN_DEBUG
331int	rn_nodenum;
332struct	radix_node *rn_clist;
333int	rn_saveinfo;
334int	rn_debug =  1;
335#endif
336
337static struct radix_node *
338rn_newpair(v, b, nodes)
339	void *v;
340	int b;
341	struct radix_node nodes[2];
342{
343	register struct radix_node *tt = nodes, *t = tt + 1;
344	t->rn_bit = b;
345	t->rn_bmask = 0x80 >> (b & 7);
346	t->rn_left = tt;
347	t->rn_offset = b >> 3;
348	tt->rn_bit = -1;
349	tt->rn_key = (caddr_t)v;
350	tt->rn_parent = t;
351	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
352	tt->rn_mklist = t->rn_mklist = 0;
353#ifdef RN_DEBUG
354	tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
355	tt->rn_twin = t;
356	tt->rn_ybro = rn_clist;
357	rn_clist = tt;
358#endif
359	return t;
360}
361
362static struct radix_node *
363rn_insert(v_arg, head, dupentry, nodes)
364	void *v_arg;
365	struct radix_node_head *head;
366	int *dupentry;
367	struct radix_node nodes[2];
368{
369	caddr_t v = v_arg;
370	struct radix_node *top = head->rnh_treetop;
371	int head_off = top->rn_offset, vlen = (int)*((u_char *)v);
372	register struct radix_node *t = rn_search(v_arg, top);
373	register caddr_t cp = v + head_off;
374	register int b;
375	struct radix_node *tt;
376    	/*
377	 * Find first bit at which v and t->rn_key differ
378	 */
379    {
380	register caddr_t cp2 = t->rn_key + head_off;
381	register int cmp_res;
382	caddr_t cplim = v + vlen;
383
384	while (cp < cplim)
385		if (*cp2++ != *cp++)
386			goto on1;
387	*dupentry = 1;
388	return t;
389on1:
390	*dupentry = 0;
391	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
392	for (b = (cp - v) << 3; cmp_res; b--)
393		cmp_res >>= 1;
394    }
395    {
396	register struct radix_node *p, *x = top;
397	cp = v;
398	do {
399		p = x;
400		if (cp[x->rn_offset] & x->rn_bmask)
401			x = x->rn_right;
402		else
403			x = x->rn_left;
404	} while (b > (unsigned) x->rn_bit);
405				/* x->rn_bit < b && x->rn_bit >= 0 */
406#ifdef RN_DEBUG
407	if (rn_debug)
408		log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
409#endif
410	t = rn_newpair(v_arg, b, nodes);
411	tt = t->rn_left;
412	if ((cp[p->rn_offset] & p->rn_bmask) == 0)
413		p->rn_left = t;
414	else
415		p->rn_right = t;
416	x->rn_parent = t;
417	t->rn_parent = p; /* frees x, p as temp vars below */
418	if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
419		t->rn_right = x;
420	} else {
421		t->rn_right = tt;
422		t->rn_left = x;
423	}
424#ifdef RN_DEBUG
425	if (rn_debug)
426		log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
427#endif
428    }
429	return (tt);
430}
431
432struct radix_node *
433rn_addmask(n_arg, search, skip)
434	int search, skip;
435	void *n_arg;
436{
437	caddr_t netmask = (caddr_t)n_arg;
438	register struct radix_node *x;
439	register caddr_t cp, cplim;
440	register int b = 0, mlen, j;
441	int maskduplicated, m0, isnormal;
442	struct radix_node *saved_x;
443	static int last_zeroed = 0;
444
445	if ((mlen = *(u_char *)netmask) > max_keylen)
446		mlen = max_keylen;
447	if (skip == 0)
448		skip = 1;
449	if (mlen <= skip)
450		return (mask_rnhead->rnh_nodes);
451	if (skip > 1)
452		bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
453	if ((m0 = mlen) > skip)
454		bcopy(netmask + skip, addmask_key + skip, mlen - skip);
455	/*
456	 * Trim trailing zeroes.
457	 */
458	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
459		cp--;
460	mlen = cp - addmask_key;
461	if (mlen <= skip) {
462		if (m0 >= last_zeroed)
463			last_zeroed = mlen;
464		return (mask_rnhead->rnh_nodes);
465	}
466	if (m0 < last_zeroed)
467		bzero(addmask_key + m0, last_zeroed - m0);
468	*addmask_key = last_zeroed = mlen;
469	x = rn_search(addmask_key, rn_masktop);
470	if (bcmp(addmask_key, x->rn_key, mlen) != 0)
471		x = 0;
472	if (x || search)
473		return (x);
474	R_Zalloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
475	if ((saved_x = x) == 0)
476		return (0);
477	netmask = cp = (caddr_t)(x + 2);
478	bcopy(addmask_key, cp, mlen);
479	x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
480	if (maskduplicated) {
481		log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
482		Free(saved_x);
483		return (x);
484	}
485	/*
486	 * Calculate index of mask, and check for normalcy.
487	 * First find the first byte with a 0 bit, then if there are
488	 * more bits left (remember we already trimmed the trailing 0's),
489	 * the pattern must be one of those in normal_chars[], or we have
490	 * a non-contiguous mask.
491	 */
492	cplim = netmask + mlen;
493	isnormal = 1;
494	for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
495		cp++;
496	if (cp != cplim) {
497		static char normal_chars[] = {
498			0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
499
500		for (j = 0x80; (j & *cp) != 0; j >>= 1)
501			b++;
502		if (*cp != normal_chars[b] || cp != (cplim - 1))
503			isnormal = 0;
504	}
505	b += (cp - netmask) << 3;
506	x->rn_bit = -1 - b;
507	if (isnormal)
508		x->rn_flags |= RNF_NORMAL;
509	return (x);
510}
511
512static int	/* XXX: arbitrary ordering for non-contiguous masks */
513rn_lexobetter(m_arg, n_arg)
514	void *m_arg, *n_arg;
515{
516	register u_char *mp = m_arg, *np = n_arg, *lim;
517
518	if (*mp > *np)
519		return 1;  /* not really, but need to check longer one first */
520	if (*mp == *np)
521		for (lim = mp + *mp; mp < lim;)
522			if (*mp++ > *np++)
523				return 1;
524	return 0;
525}
526
527static struct radix_mask *
528rn_new_radix_mask(tt, next)
529	register struct radix_node *tt;
530	register struct radix_mask *next;
531{
532	register struct radix_mask *m;
533
534	MKGet(m);
535	if (m == 0) {
536		log(LOG_ERR, "Mask for route not entered\n");
537		return (0);
538	}
539	bzero(m, sizeof *m);
540	m->rm_bit = tt->rn_bit;
541	m->rm_flags = tt->rn_flags;
542	if (tt->rn_flags & RNF_NORMAL)
543		m->rm_leaf = tt;
544	else
545		m->rm_mask = tt->rn_mask;
546	m->rm_mklist = next;
547	tt->rn_mklist = m;
548	return m;
549}
550
551struct radix_node *
552rn_addroute(v_arg, n_arg, head, treenodes)
553	void *v_arg, *n_arg;
554	struct radix_node_head *head;
555	struct radix_node treenodes[2];
556{
557	caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
558	register struct radix_node *t, *x = 0, *tt;
559	struct radix_node *saved_tt, *top = head->rnh_treetop;
560	short b = 0, b_leaf = 0;
561	int keyduplicated;
562	caddr_t mmask;
563	struct radix_mask *m, **mp;
564
565	/*
566	 * In dealing with non-contiguous masks, there may be
567	 * many different routes which have the same mask.
568	 * We will find it useful to have a unique pointer to
569	 * the mask to speed avoiding duplicate references at
570	 * nodes and possibly save time in calculating indices.
571	 */
572	if (netmask)  {
573		if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
574			return (0);
575		b_leaf = x->rn_bit;
576		b = -1 - x->rn_bit;
577		netmask = x->rn_key;
578	}
579	/*
580	 * Deal with duplicated keys: attach node to previous instance
581	 */
582	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
583	if (keyduplicated) {
584		for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
585			if (tt->rn_mask == netmask)
586				return (0);
587			if (netmask == 0 ||
588			    (tt->rn_mask &&
589			     ((b_leaf < tt->rn_bit) /* index(netmask) > node */
590			      || rn_refines(netmask, tt->rn_mask)
591			      || rn_lexobetter(netmask, tt->rn_mask))))
592				break;
593		}
594		/*
595		 * If the mask is not duplicated, we wouldn't
596		 * find it among possible duplicate key entries
597		 * anyway, so the above test doesn't hurt.
598		 *
599		 * We sort the masks for a duplicated key the same way as
600		 * in a masklist -- most specific to least specific.
601		 * This may require the unfortunate nuisance of relocating
602		 * the head of the list.
603		 *
604		 * We also reverse, or doubly link the list through the
605		 * parent pointer.
606		 */
607		if (tt == saved_tt) {
608			struct	radix_node *xx = x;
609			/* link in at head of list */
610			(tt = treenodes)->rn_dupedkey = t;
611			tt->rn_flags = t->rn_flags;
612			tt->rn_parent = x = t->rn_parent;
613			t->rn_parent = tt;	 		/* parent */
614			if (x->rn_left == t)
615				x->rn_left = tt;
616			else
617				x->rn_right = tt;
618			saved_tt = tt; x = xx;
619		} else {
620			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
621			t->rn_dupedkey = tt;
622			tt->rn_parent = t;			/* parent */
623			if (tt->rn_dupedkey)			/* parent */
624				tt->rn_dupedkey->rn_parent = tt; /* parent */
625		}
626#ifdef RN_DEBUG
627		t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
628		tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
629#endif
630		tt->rn_key = (caddr_t) v;
631		tt->rn_bit = -1;
632		tt->rn_flags = RNF_ACTIVE;
633	}
634	/*
635	 * Put mask in tree.
636	 */
637	if (netmask) {
638		tt->rn_mask = netmask;
639		tt->rn_bit = x->rn_bit;
640		tt->rn_flags |= x->rn_flags & RNF_NORMAL;
641	}
642	t = saved_tt->rn_parent;
643	if (keyduplicated)
644		goto on2;
645	b_leaf = -1 - t->rn_bit;
646	if (t->rn_right == saved_tt)
647		x = t->rn_left;
648	else
649		x = t->rn_right;
650	/* Promote general routes from below */
651	if (x->rn_bit < 0) {
652	    for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
653		if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
654			*mp = m = rn_new_radix_mask(x, 0);
655			if (m)
656				mp = &m->rm_mklist;
657		}
658	} else if (x->rn_mklist) {
659		/*
660		 * Skip over masks whose index is > that of new node
661		 */
662		for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
663			if (m->rm_bit >= b_leaf)
664				break;
665		t->rn_mklist = m; *mp = 0;
666	}
667on2:
668	/* Add new route to highest possible ancestor's list */
669	if ((netmask == 0) || (b > t->rn_bit ))
670		return tt; /* can't lift at all */
671	b_leaf = tt->rn_bit;
672	do {
673		x = t;
674		t = t->rn_parent;
675	} while (b <= t->rn_bit && x != top);
676	/*
677	 * Search through routes associated with node to
678	 * insert new route according to index.
679	 * Need same criteria as when sorting dupedkeys to avoid
680	 * double loop on deletion.
681	 */
682	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
683		if (m->rm_bit < b_leaf)
684			continue;
685		if (m->rm_bit > b_leaf)
686			break;
687		if (m->rm_flags & RNF_NORMAL) {
688			mmask = m->rm_leaf->rn_mask;
689			if (tt->rn_flags & RNF_NORMAL) {
690			    log(LOG_ERR,
691			        "Non-unique normal route, mask not entered\n");
692				return tt;
693			}
694		} else
695			mmask = m->rm_mask;
696		if (mmask == netmask) {
697			m->rm_refs++;
698			tt->rn_mklist = m;
699			return tt;
700		}
701		if (rn_refines(netmask, mmask)
702		    || rn_lexobetter(netmask, mmask))
703			break;
704	}
705	*mp = rn_new_radix_mask(tt, *mp);
706	return tt;
707}
708
709struct radix_node *
710rn_delete(v_arg, netmask_arg, head)
711	void *v_arg, *netmask_arg;
712	struct radix_node_head *head;
713{
714	register struct radix_node *t, *p, *x, *tt;
715	struct radix_mask *m, *saved_m, **mp;
716	struct radix_node *dupedkey, *saved_tt, *top;
717	caddr_t v, netmask;
718	int b, head_off, vlen;
719
720	v = v_arg;
721	netmask = netmask_arg;
722	x = head->rnh_treetop;
723	tt = rn_search(v, x);
724	head_off = x->rn_offset;
725	vlen =  *(u_char *)v;
726	saved_tt = tt;
727	top = x;
728	if (tt == 0 ||
729	    bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
730		return (0);
731	/*
732	 * Delete our route from mask lists.
733	 */
734	if (netmask) {
735		if ((x = rn_addmask(netmask, 1, head_off)) == 0)
736			return (0);
737		netmask = x->rn_key;
738		while (tt->rn_mask != netmask)
739			if ((tt = tt->rn_dupedkey) == 0)
740				return (0);
741	}
742	if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
743		goto on1;
744	if (tt->rn_flags & RNF_NORMAL) {
745		if (m->rm_leaf != tt || m->rm_refs > 0) {
746			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
747			return 0;  /* dangling ref could cause disaster */
748		}
749	} else {
750		if (m->rm_mask != tt->rn_mask) {
751			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
752			goto on1;
753		}
754		if (--m->rm_refs >= 0)
755			goto on1;
756	}
757	b = -1 - tt->rn_bit;
758	t = saved_tt->rn_parent;
759	if (b > t->rn_bit)
760		goto on1; /* Wasn't lifted at all */
761	do {
762		x = t;
763		t = t->rn_parent;
764	} while (b <= t->rn_bit && x != top);
765	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
766		if (m == saved_m) {
767			*mp = m->rm_mklist;
768			MKFree(m);
769			break;
770		}
771	if (m == 0) {
772		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
773		if (tt->rn_flags & RNF_NORMAL)
774			return (0); /* Dangling ref to us */
775	}
776on1:
777	/*
778	 * Eliminate us from tree
779	 */
780	if (tt->rn_flags & RNF_ROOT)
781		return (0);
782#ifdef RN_DEBUG
783	/* Get us out of the creation list */
784	for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
785	if (t) t->rn_ybro = tt->rn_ybro;
786#endif
787	t = tt->rn_parent;
788	dupedkey = saved_tt->rn_dupedkey;
789	if (dupedkey) {
790		/*
791		 * Here, tt is the deletion target and
792		 * saved_tt is the head of the dupekey chain.
793		 */
794		if (tt == saved_tt) {
795			/* remove from head of chain */
796			x = dupedkey; x->rn_parent = t;
797			if (t->rn_left == tt)
798				t->rn_left = x;
799			else
800				t->rn_right = x;
801		} else {
802			/* find node in front of tt on the chain */
803			for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
804				p = p->rn_dupedkey;
805			if (p) {
806				p->rn_dupedkey = tt->rn_dupedkey;
807				if (tt->rn_dupedkey)		/* parent */
808					tt->rn_dupedkey->rn_parent = p;
809								/* parent */
810			} else log(LOG_ERR, "rn_delete: couldn't find us\n");
811		}
812		t = tt + 1;
813		if  (t->rn_flags & RNF_ACTIVE) {
814#ifndef RN_DEBUG
815			*++x = *t;
816			p = t->rn_parent;
817#else
818			b = t->rn_info;
819			*++x = *t;
820			t->rn_info = b;
821			p = t->rn_parent;
822#endif
823			if (p->rn_left == t)
824				p->rn_left = x;
825			else
826				p->rn_right = x;
827			x->rn_left->rn_parent = x;
828			x->rn_right->rn_parent = x;
829		}
830		goto out;
831	}
832	if (t->rn_left == tt)
833		x = t->rn_right;
834	else
835		x = t->rn_left;
836	p = t->rn_parent;
837	if (p->rn_right == t)
838		p->rn_right = x;
839	else
840		p->rn_left = x;
841	x->rn_parent = p;
842	/*
843	 * Demote routes attached to us.
844	 */
845	if (t->rn_mklist) {
846		if (x->rn_bit >= 0) {
847			for (mp = &x->rn_mklist; (m = *mp);)
848				mp = &m->rm_mklist;
849			*mp = t->rn_mklist;
850		} else {
851			/* If there are any key,mask pairs in a sibling
852			   duped-key chain, some subset will appear sorted
853			   in the same order attached to our mklist */
854			for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
855				if (m == x->rn_mklist) {
856					struct radix_mask *mm = m->rm_mklist;
857					x->rn_mklist = 0;
858					if (--(m->rm_refs) < 0)
859						MKFree(m);
860					m = mm;
861				}
862			if (m)
863				log(LOG_ERR,
864				    "rn_delete: Orphaned Mask %p at %p\n",
865				    (void *)m, (void *)x);
866		}
867	}
868	/*
869	 * We may be holding an active internal node in the tree.
870	 */
871	x = tt + 1;
872	if (t != x) {
873#ifndef RN_DEBUG
874		*t = *x;
875#else
876		b = t->rn_info;
877		*t = *x;
878		t->rn_info = b;
879#endif
880		t->rn_left->rn_parent = t;
881		t->rn_right->rn_parent = t;
882		p = x->rn_parent;
883		if (p->rn_left == x)
884			p->rn_left = t;
885		else
886			p->rn_right = t;
887	}
888out:
889	tt->rn_flags &= ~RNF_ACTIVE;
890	tt[1].rn_flags &= ~RNF_ACTIVE;
891	return (tt);
892}
893
894/*
895 * This is the same as rn_walktree() except for the parameters and the
896 * exit.
897 */
898static int
899rn_walktree_from(h, a, m, f, w)
900	struct radix_node_head *h;
901	void *a, *m;
902	walktree_f_t *f;
903	void *w;
904{
905	int error;
906	struct radix_node *base, *next;
907	u_char *xa = (u_char *)a;
908	u_char *xm = (u_char *)m;
909	register struct radix_node *rn, *last = 0 /* shut up gcc */;
910	int stopping = 0;
911	int lastb;
912
913	/*
914	 * rn_search_m is sort-of-open-coded here.
915	 */
916	/* printf("about to search\n"); */
917	for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
918		last = rn;
919		/* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n",
920		       rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */
921		if (!(rn->rn_bmask & xm[rn->rn_offset])) {
922			break;
923		}
924		if (rn->rn_bmask & xa[rn->rn_offset]) {
925			rn = rn->rn_right;
926		} else {
927			rn = rn->rn_left;
928		}
929	}
930	/* printf("done searching\n"); */
931
932	/*
933	 * Two cases: either we stepped off the end of our mask,
934	 * in which case last == rn, or we reached a leaf, in which
935	 * case we want to start from the last node we looked at.
936	 * Either way, last is the node we want to start from.
937	 */
938	rn = last;
939	lastb = rn->rn_bit;
940
941	/* printf("rn %p, lastb %d\n", rn, lastb);*/
942
943	/*
944	 * This gets complicated because we may delete the node
945	 * while applying the function f to it, so we need to calculate
946	 * the successor node in advance.
947	 */
948	while (rn->rn_bit >= 0)
949		rn = rn->rn_left;
950
951	while (!stopping) {
952		/* printf("node %p (%d)\n", rn, rn->rn_bit); */
953		base = rn;
954		/* If at right child go back up, otherwise, go right */
955		while (rn->rn_parent->rn_right == rn
956		       && !(rn->rn_flags & RNF_ROOT)) {
957			rn = rn->rn_parent;
958
959			/* if went up beyond last, stop */
960			if (rn->rn_bit < lastb) {
961				stopping = 1;
962				/* printf("up too far\n"); */
963			}
964		}
965
966		/* Find the next *leaf* since next node might vanish, too */
967		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
968			rn = rn->rn_left;
969		next = rn;
970		/* Process leaves */
971		while ((rn = base) != 0) {
972			base = rn->rn_dupedkey;
973			/* printf("leaf %p\n", rn); */
974			if (!(rn->rn_flags & RNF_ROOT)
975			    && (error = (*f)(rn, w)))
976				return (error);
977		}
978		rn = next;
979
980		if (rn->rn_flags & RNF_ROOT) {
981			/* printf("root, stopping"); */
982			stopping = 1;
983		}
984
985	}
986	return 0;
987}
988
989static int
990rn_walktree(h, f, w)
991	struct radix_node_head *h;
992	walktree_f_t *f;
993	void *w;
994{
995	int error;
996	struct radix_node *base, *next;
997	register struct radix_node *rn = h->rnh_treetop;
998	/*
999	 * This gets complicated because we may delete the node
1000	 * while applying the function f to it, so we need to calculate
1001	 * the successor node in advance.
1002	 */
1003	/* First time through node, go left */
1004	while (rn->rn_bit >= 0)
1005		rn = rn->rn_left;
1006	for (;;) {
1007		base = rn;
1008		/* If at right child go back up, otherwise, go right */
1009		while (rn->rn_parent->rn_right == rn
1010		       && (rn->rn_flags & RNF_ROOT) == 0)
1011			rn = rn->rn_parent;
1012		/* Find the next *leaf* since next node might vanish, too */
1013		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1014			rn = rn->rn_left;
1015		next = rn;
1016		/* Process leaves */
1017		while ((rn = base)) {
1018			base = rn->rn_dupedkey;
1019			if (!(rn->rn_flags & RNF_ROOT)
1020			    && (error = (*f)(rn, w)))
1021				return (error);
1022		}
1023		rn = next;
1024		if (rn->rn_flags & RNF_ROOT)
1025			return (0);
1026	}
1027	/* NOTREACHED */
1028}
1029
1030int
1031rn_inithead(head, off)
1032	void **head;
1033	int off;
1034{
1035	register struct radix_node_head *rnh;
1036	register struct radix_node *t, *tt, *ttt;
1037	if (*head)
1038		return (1);
1039	R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
1040	if (rnh == 0)
1041		return (0);
1042#ifdef _KERNEL
1043	RADIX_NODE_HEAD_LOCK_INIT(rnh);
1044#endif
1045	*head = rnh;
1046	t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
1047	ttt = rnh->rnh_nodes + 2;
1048	t->rn_right = ttt;
1049	t->rn_parent = t;
1050	tt = t->rn_left;
1051	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1052	tt->rn_bit = -1 - off;
1053	*ttt = *tt;
1054	ttt->rn_key = rn_ones;
1055	rnh->rnh_addaddr = rn_addroute;
1056	rnh->rnh_deladdr = rn_delete;
1057	rnh->rnh_matchaddr = rn_match;
1058	rnh->rnh_lookup = rn_lookup;
1059	rnh->rnh_walktree = rn_walktree;
1060	rnh->rnh_walktree_from = rn_walktree_from;
1061	rnh->rnh_treetop = t;
1062	return (1);
1063}
1064
1065void
1066rn_init()
1067{
1068	char *cp, *cplim;
1069#ifdef _KERNEL
1070	struct domain *dom;
1071
1072	for (dom = domains; dom; dom = dom->dom_next)
1073		if (dom->dom_maxrtkey > max_keylen)
1074			max_keylen = dom->dom_maxrtkey;
1075#endif
1076	if (max_keylen == 0) {
1077		log(LOG_ERR,
1078		    "rn_init: radix functions require max_keylen be set\n");
1079		return;
1080	}
1081	R_Malloc(rn_zeros, char *, 3 * max_keylen);
1082	if (rn_zeros == NULL)
1083		panic("rn_init");
1084	bzero(rn_zeros, 3 * max_keylen);
1085	rn_ones = cp = rn_zeros + max_keylen;
1086	addmask_key = cplim = rn_ones + max_keylen;
1087	while (cp < cplim)
1088		*cp++ = -1;
1089	if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0)
1090		panic("rn_init 2");
1091}
1092