1/* 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1988, 1989, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)radix.c 8.4 (Berkeley) 11/2/94 61 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $ 62 */ 63 64/* 65 * Routines to build and maintain radix trees for routing lookups. 66 */ 67#ifndef _RADIX_H_ 68#include <sys/param.h> 69#ifdef KERNEL 70#include <sys/systm.h> 71#include <sys/malloc.h> 72#define M_DONTWAIT M_NOWAIT 73#include <sys/domain.h> 74#else 75#include <stdlib.h> 76#endif 77#include <sys/syslog.h> 78#include <net/radix.h> 79#include <sys/socket.h> 80#include <sys/socketvar.h> 81#include <kern/locks.h> 82#endif 83 84static int rn_walktree_from(struct radix_node_head *h, void *a, 85 void *m, walktree_f_t *f, void *w); 86static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); 87static struct radix_node 88 *rn_insert(void *, struct radix_node_head *, int *, 89 struct radix_node [2]), 90 *rn_newpair(void *, int, struct radix_node[2]), 91 *rn_search(void *, struct radix_node *), 92 *rn_search_m(void *, struct radix_node *, void *); 93 94static int max_keylen; 95static struct radix_mask *rn_mkfreelist; 96static struct radix_node_head *mask_rnhead; 97static char *addmask_key; 98static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1}; 99static char *rn_zeros, *rn_ones; 100 101 102extern lck_grp_t *domain_proto_mtx_grp; 103extern lck_attr_t *domain_proto_mtx_attr; 104 105#define rn_masktop (mask_rnhead->rnh_treetop) 106#undef Bcmp 107#define Bcmp(a, b, l) \ 108 (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (uint32_t)l)) 109 110static int rn_lexobetter(void *m_arg, void *n_arg); 111static struct radix_mask * 112 rn_new_radix_mask(struct radix_node *tt, 113 struct radix_mask *next); 114static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip, 115 rn_matchf_t *f, void *w); 116 117#define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg)) 118 119/* 120 * The data structure for the keys is a radix tree with one way 121 * branching removed. The index rn_bit at an internal node n represents a bit 122 * position to be tested. The tree is arranged so that all descendants 123 * of a node n have keys whose bits all agree up to position rn_bit - 1. 124 * (We say the index of n is rn_bit.) 125 * 126 * There is at least one descendant which has a one bit at position rn_bit, 127 * and at least one with a zero there. 128 * 129 * A route is determined by a pair of key and mask. We require that the 130 * bit-wise logical and of the key and mask to be the key. 131 * We define the index of a route to associated with the mask to be 132 * the first bit number in the mask where 0 occurs (with bit number 0 133 * representing the highest order bit). 134 * 135 * We say a mask is normal if every bit is 0, past the index of the mask. 136 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 137 * and m is a normal mask, then the route applies to every descendant of n. 138 * If the index(m) < rn_bit, this implies the trailing last few bits of k 139 * before bit b are all 0, (and hence consequently true of every descendant 140 * of n), so the route applies to all descendants of the node as well. 141 * 142 * Similar logic shows that a non-normal mask m such that 143 * index(m) <= index(n) could potentially apply to many children of n. 144 * Thus, for each non-host route, we attach its mask to a list at an internal 145 * node as high in the tree as we can go. 146 * 147 * The present version of the code makes use of normal routes in short- 148 * circuiting an explict mask and compare operation when testing whether 149 * a key satisfies a normal route, and also in remembering the unique leaf 150 * that governs a subtree. 151 */ 152 153static struct radix_node * 154rn_search(void *v_arg, struct radix_node *head) 155{ 156 struct radix_node *x; 157 caddr_t v; 158 159 for (x = head, v = v_arg; x->rn_bit >= 0;) { 160 if (x->rn_bmask & v[x->rn_offset]) 161 x = x->rn_right; 162 else 163 x = x->rn_left; 164 } 165 return (x); 166} 167 168static struct radix_node * 169rn_search_m(void *v_arg, struct radix_node *head, void *m_arg) 170{ 171 struct radix_node *x; 172 caddr_t v = v_arg, m = m_arg; 173 174 for (x = head; x->rn_bit >= 0;) { 175 if ((x->rn_bmask & m[x->rn_offset]) && 176 (x->rn_bmask & v[x->rn_offset])) 177 x = x->rn_right; 178 else 179 x = x->rn_left; 180 } 181 return x; 182} 183 184int 185rn_refines(void *m_arg, void *n_arg) 186{ 187 caddr_t m = m_arg, n = n_arg; 188 caddr_t lim, lim2 = lim = n + *(u_char *)n; 189 int longer = (*(u_char *)n++) - (int)(*(u_char *)m++); 190 int masks_are_equal = 1; 191 192 if (longer > 0) 193 lim -= longer; 194 while (n < lim) { 195 if (*n & ~(*m)) 196 return 0; 197 if (*n++ != *m++) 198 masks_are_equal = 0; 199 } 200 while (n < lim2) 201 if (*n++) 202 return 0; 203 if (masks_are_equal && (longer < 0)) 204 for (lim2 = m - longer; m < lim2; ) 205 if (*m++) 206 return 1; 207 return (!masks_are_equal); 208} 209 210struct radix_node * 211rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head) 212{ 213 return (rn_lookup_args(v_arg, m_arg, head, NULL, NULL)); 214} 215 216struct radix_node * 217rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head, 218 rn_matchf_t *f, void *w) 219{ 220 struct radix_node *x; 221 caddr_t netmask = NULL; 222 223 if (m_arg) { 224 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); 225 if (x == 0) 226 return (NULL); 227 netmask = x->rn_key; 228 } 229 x = rn_match_args(v_arg, head, f, w); 230 if (x && netmask) { 231 while (x && x->rn_mask != netmask) 232 x = x->rn_dupedkey; 233 } 234 return x; 235} 236 237/* 238 * Returns true if address 'trial' has no bits differing from the 239 * leaf's key when compared under the leaf's mask. In other words, 240 * returns true when 'trial' matches leaf. If a leaf-matching 241 * routine is passed in, it is also used to find a match on the 242 * conditions defined by the caller of rn_match. 243 */ 244static int 245rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip, 246 rn_matchf_t *f, void *w) 247{ 248 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 249 char *cplim; 250 int length = min(*(u_char *)cp, *(u_char *)cp2); 251 252 if (cp3 == 0) 253 cp3 = rn_ones; 254 else 255 length = min(length, *(u_char *)cp3); 256 cplim = cp + length; cp3 += skip; cp2 += skip; 257 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) 258 if ((*cp ^ *cp2) & *cp3) 259 return 0; 260 261 return (RN_MATCHF(leaf, f, w)); 262} 263 264struct radix_node * 265rn_match(void *v_arg, struct radix_node_head *head) 266{ 267 return (rn_match_args(v_arg, head, NULL, NULL)); 268} 269 270struct radix_node * 271rn_match_args(void *v_arg, struct radix_node_head *head, 272 rn_matchf_t *f, void *w) 273{ 274 caddr_t v = v_arg; 275 struct radix_node *t = head->rnh_treetop, *x; 276 caddr_t cp = v, cp2; 277 caddr_t cplim; 278 struct radix_node *saved_t, *top = t; 279 int off = t->rn_offset, vlen = *(u_char *)cp, matched_off; 280 int test, b, rn_bit; 281 282 /* 283 * Open code rn_search(v, top) to avoid overhead of extra 284 * subroutine call. 285 */ 286 for (; t->rn_bit >= 0; ) { 287 if (t->rn_bmask & cp[t->rn_offset]) 288 t = t->rn_right; 289 else 290 t = t->rn_left; 291 } 292 /* 293 * See if we match exactly as a host destination 294 * or at least learn how many bits match, for normal mask finesse. 295 * 296 * It doesn't hurt us to limit how many bytes to check 297 * to the length of the mask, since if it matches we had a genuine 298 * match and the leaf we have is the most specific one anyway; 299 * if it didn't match with a shorter length it would fail 300 * with a long one. This wins big for class B&C netmasks which 301 * are probably the most common case... 302 */ 303 if (t->rn_mask) 304 vlen = *(u_char *)t->rn_mask; 305 cp += off; cp2 = t->rn_key + off; cplim = v + vlen; 306 for (; cp < cplim; cp++, cp2++) 307 if (*cp != *cp2) 308 goto on1; 309 /* 310 * This extra grot is in case we are explicitly asked 311 * to look up the default. Ugh! 312 * 313 * Never return the root node itself, it seems to cause a 314 * lot of confusion. 315 */ 316 if (t->rn_flags & RNF_ROOT) 317 t = t->rn_dupedkey; 318 if (t == NULL || RN_MATCHF(t, f, w)) { 319 return (t); 320 } else { 321 /* 322 * Although we found an exact match on the key, 323 * f() is looking for some other criteria as well. 324 * Continue looking as if the exact match failed. 325 */ 326 if (t->rn_parent->rn_flags & RNF_ROOT) { 327 /* Hit the top; have to give up */ 328 return (NULL); 329 } 330 b = 0; 331 goto keeplooking; 332 } 333on1: 334 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 335 for (b = 7; (test >>= 1) > 0;) 336 b--; 337keeplooking: 338 matched_off = cp - v; 339 b += matched_off << 3; 340 rn_bit = -1 - b; 341 /* 342 * If there is a host route in a duped-key chain, it will be first. 343 */ 344 if ((saved_t = t)->rn_mask == 0) 345 t = t->rn_dupedkey; 346 for (; t; t = t->rn_dupedkey) { 347 /* 348 * Even if we don't match exactly as a host, 349 * we may match if the leaf we wound up at is 350 * a route to a net. 351 */ 352 if (t->rn_flags & RNF_NORMAL) { 353 if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w)) 354 return (t); 355 } else if (rn_satisfies_leaf(v, t, matched_off, f, w)) { 356 return (t); 357 } 358 } 359 t = saved_t; 360 /* start searching up the tree */ 361 do { 362 struct radix_mask *m; 363 t = t->rn_parent; 364 m = t->rn_mklist; 365 /* 366 * If non-contiguous masks ever become important 367 * we can restore the masking and open coding of 368 * the search and satisfaction test and put the 369 * calculation of "off" back before the "do". 370 */ 371 while (m) { 372 if (m->rm_flags & RNF_NORMAL) { 373 if ((rn_bit <= m->rm_bit) && 374 RN_MATCHF(m->rm_leaf, f, w)) 375 return (m->rm_leaf); 376 } else { 377 off = min(t->rn_offset, matched_off); 378 x = rn_search_m(v, t, m->rm_mask); 379 while (x && x->rn_mask != m->rm_mask) 380 x = x->rn_dupedkey; 381 if (x && rn_satisfies_leaf(v, x, off, f, w)) 382 return (x); 383 } 384 m = m->rm_mklist; 385 } 386 } while (t != top); 387 return (NULL); 388} 389 390#ifdef RN_DEBUG 391int rn_nodenum; 392struct radix_node *rn_clist; 393int rn_saveinfo; 394int rn_debug = 1; 395#endif 396 397static struct radix_node * 398rn_newpair(void *v, int b, struct radix_node nodes[2]) 399{ 400 struct radix_node *tt = nodes, *t = tt + 1; 401 t->rn_bit = b; 402 t->rn_bmask = 0x80 >> (b & 7); 403 t->rn_left = tt; 404 t->rn_offset = b >> 3; 405 tt->rn_bit = -1; 406 tt->rn_key = (caddr_t)v; 407 tt->rn_parent = t; 408 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 409 tt->rn_mklist = t->rn_mklist = NULL; 410#ifdef RN_DEBUG 411 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 412 tt->rn_twin = t; 413 tt->rn_ybro = rn_clist; 414 rn_clist = tt; 415#endif 416 return t; 417} 418 419static struct radix_node * 420rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry, 421 struct radix_node nodes[2]) 422{ 423 caddr_t v = v_arg; 424 struct radix_node *top = head->rnh_treetop; 425 int head_off = top->rn_offset, vlen = (int)*((u_char *)v); 426 struct radix_node *t = rn_search(v_arg, top); 427 caddr_t cp = v + head_off; 428 int b; 429 struct radix_node *tt; 430 /* 431 * Find first bit at which v and t->rn_key differ 432 */ 433 { 434 caddr_t cp2 = t->rn_key + head_off; 435 int cmp_res; 436 caddr_t cplim = v + vlen; 437 438 while (cp < cplim) 439 if (*cp2++ != *cp++) 440 goto on1; 441 *dupentry = 1; 442 return t; 443on1: 444 *dupentry = 0; 445 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 446 for (b = (cp - v) << 3; cmp_res; b--) 447 cmp_res >>= 1; 448 } 449 { 450 struct radix_node *p, *x = top; 451 cp = v; 452 do { 453 p = x; 454 if (cp[x->rn_offset] & x->rn_bmask) 455 x = x->rn_right; 456 else 457 x = x->rn_left; 458 } while (b > (unsigned) x->rn_bit); 459 /* x->rn_bit < b && x->rn_bit >= 0 */ 460#ifdef RN_DEBUG 461 if (rn_debug) 462 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p); 463#endif 464 t = rn_newpair(v_arg, b, nodes); 465 tt = t->rn_left; 466 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 467 p->rn_left = t; 468 else 469 p->rn_right = t; 470 x->rn_parent = t; 471 t->rn_parent = p; /* frees x, p as temp vars below */ 472 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 473 t->rn_right = x; 474 } else { 475 t->rn_right = tt; 476 t->rn_left = x; 477 } 478#ifdef RN_DEBUG 479 if (rn_debug) 480 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p); 481#endif 482 } 483 return (tt); 484} 485 486struct radix_node * 487rn_addmask(void *n_arg, int search, int skip) 488{ 489 caddr_t netmask = (caddr_t)n_arg; 490 struct radix_node *x; 491 caddr_t cp, cplim; 492 int b = 0, mlen, j; 493 int maskduplicated, m0, isnormal; 494 struct radix_node *saved_x; 495 static int last_zeroed = 0; 496 497 if ((mlen = *(u_char *)netmask) > max_keylen) 498 mlen = max_keylen; 499 if (skip == 0) 500 skip = 1; 501 if (mlen <= skip) 502 return (mask_rnhead->rnh_nodes); 503 if (skip > 1) 504 Bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 505 if ((m0 = mlen) > skip) 506 Bcopy(netmask + skip, addmask_key + skip, mlen - skip); 507 /* 508 * Trim trailing zeroes. 509 */ 510 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 511 cp--; 512 mlen = cp - addmask_key; 513 if (mlen <= skip) { 514 if (m0 >= last_zeroed) 515 last_zeroed = mlen; 516 return (mask_rnhead->rnh_nodes); 517 } 518 if (m0 < last_zeroed) 519 Bzero(addmask_key + m0, last_zeroed - m0); 520 *addmask_key = last_zeroed = mlen; 521 x = rn_search(addmask_key, rn_masktop); 522 if (Bcmp(addmask_key, x->rn_key, mlen) != 0) 523 x = NULL; 524 if (x || search) 525 return (x); 526 R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); 527 if ((saved_x = x) == 0) 528 return (NULL); 529 Bzero(x, max_keylen + 2 * sizeof (*x)); 530 netmask = cp = (caddr_t)(x + 2); 531 Bcopy(addmask_key, cp, mlen); 532 x = rn_insert(cp, mask_rnhead, &maskduplicated, x); 533 if (maskduplicated) { 534 log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); 535 R_Free(saved_x); 536 return (x); 537 } 538 mask_rnhead->rnh_cnt++; 539 /* 540 * Calculate index of mask, and check for normalcy. 541 */ 542 cplim = netmask + mlen; isnormal = 1; 543 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) 544 cp++; 545 if (cp != cplim) { 546 for (j = 0x80; (j & *cp) != 0; j >>= 1) 547 b++; 548 if (*cp != normal_chars[b] || cp != (cplim - 1)) 549 isnormal = 0; 550 } 551 b += (cp - netmask) << 3; 552 x->rn_bit = -1 - b; 553 if (isnormal) 554 x->rn_flags |= RNF_NORMAL; 555 return (x); 556} 557 558static int /* XXX: arbitrary ordering for non-contiguous masks */ 559rn_lexobetter(void *m_arg, void *n_arg) 560{ 561 u_char *mp = m_arg, *np = n_arg, *lim; 562 563 if (*mp > *np) 564 return 1; /* not really, but need to check longer one first */ 565 if (*mp == *np) 566 for (lim = mp + *mp; mp < lim;) 567 if (*mp++ > *np++) 568 return 1; 569 return 0; 570} 571 572static struct radix_mask * 573rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next) 574{ 575 struct radix_mask *m; 576 577 MKGet(m); 578 if (m == 0) { 579 log(LOG_ERR, "Mask for route not entered\n"); 580 return (NULL); 581 } 582 Bzero(m, sizeof *m); 583 m->rm_bit = tt->rn_bit; 584 m->rm_flags = tt->rn_flags; 585 if (tt->rn_flags & RNF_NORMAL) 586 m->rm_leaf = tt; 587 else 588 m->rm_mask = tt->rn_mask; 589 m->rm_mklist = next; 590 tt->rn_mklist = m; 591 return m; 592} 593 594struct radix_node * 595rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 596 struct radix_node treenodes[2]) 597{ 598 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; 599 struct radix_node *t, *x = NULL, *tt; 600 struct radix_node *saved_tt, *top = head->rnh_treetop; 601 short b = 0, b_leaf = 0; 602 int keyduplicated; 603 caddr_t mmask; 604 struct radix_mask *m, **mp; 605 606 /* 607 * In dealing with non-contiguous masks, there may be 608 * many different routes which have the same mask. 609 * We will find it useful to have a unique pointer to 610 * the mask to speed avoiding duplicate references at 611 * nodes and possibly save time in calculating indices. 612 */ 613 if (netmask) { 614 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) 615 return (NULL); 616 b_leaf = x->rn_bit; 617 b = -1 - x->rn_bit; 618 netmask = x->rn_key; 619 } 620 /* 621 * Deal with duplicated keys: attach node to previous instance 622 */ 623 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); 624 if (keyduplicated) { 625 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 626 if (tt->rn_mask == netmask) 627 return (NULL); 628 if (netmask == 0 || 629 (tt->rn_mask && 630 ((b_leaf < tt->rn_bit) /* index(netmask) > node */ 631 || rn_refines(netmask, tt->rn_mask) 632 || rn_lexobetter(netmask, tt->rn_mask)))) 633 break; 634 } 635 /* 636 * If the mask is not duplicated, we wouldn't 637 * find it among possible duplicate key entries 638 * anyway, so the above test doesn't hurt. 639 * 640 * We sort the masks for a duplicated key the same way as 641 * in a masklist -- most specific to least specific. 642 * This may require the unfortunate nuisance of relocating 643 * the head of the list. 644 */ 645 if (tt == saved_tt) { 646 struct radix_node *xx = x; 647 /* link in at head of list */ 648 (tt = treenodes)->rn_dupedkey = t; 649 tt->rn_flags = t->rn_flags; 650 tt->rn_parent = x = t->rn_parent; 651 t->rn_parent = tt; /* parent */ 652 if (x->rn_left == t) 653 x->rn_left = tt; 654 else 655 x->rn_right = tt; 656 saved_tt = tt; x = xx; 657 } else { 658 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 659 t->rn_dupedkey = tt; 660 tt->rn_parent = t; /* parent */ 661 if (tt->rn_dupedkey) /* parent */ 662 tt->rn_dupedkey->rn_parent = tt; /* parent */ 663 } 664#ifdef RN_DEBUG 665 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 666 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; 667#endif 668 tt->rn_key = (caddr_t) v; 669 tt->rn_bit = -1; 670 tt->rn_flags = RNF_ACTIVE; 671 } 672 head->rnh_cnt++; 673 /* 674 * Put mask in tree. 675 */ 676 if (netmask) { 677 tt->rn_mask = netmask; 678 tt->rn_bit = x->rn_bit; 679 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 680 } 681 t = saved_tt->rn_parent; 682 if (keyduplicated) 683 goto on2; 684 b_leaf = -1 - t->rn_bit; 685 if (t->rn_right == saved_tt) 686 x = t->rn_left; 687 else 688 x = t->rn_right; 689 /* Promote general routes from below */ 690 if (x->rn_bit < 0) { 691 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) 692 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { 693 *mp = m = rn_new_radix_mask(x, NULL); 694 if (m) 695 mp = &m->rm_mklist; 696 } 697 } else if (x->rn_mklist) { 698 /* 699 * Skip over masks whose index is > that of new node 700 */ 701 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 702 if (m->rm_bit >= b_leaf) 703 break; 704 t->rn_mklist = m; *mp = NULL; 705 } 706on2: 707 /* Add new route to highest possible ancestor's list */ 708 if ((netmask == 0) || (b > t->rn_bit )) 709 return tt; /* can't lift at all */ 710 b_leaf = tt->rn_bit; 711 do { 712 x = t; 713 t = t->rn_parent; 714 } while (b <= t->rn_bit && x != top); 715 /* 716 * Search through routes associated with node to 717 * insert new route according to index. 718 * Need same criteria as when sorting dupedkeys to avoid 719 * double loop on deletion. 720 */ 721 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { 722 if (m->rm_bit < b_leaf) 723 continue; 724 if (m->rm_bit > b_leaf) 725 break; 726 if (m->rm_flags & RNF_NORMAL) { 727 mmask = m->rm_leaf->rn_mask; 728 if (tt->rn_flags & RNF_NORMAL) { 729 log(LOG_ERR, 730 "Non-unique normal route, mask not entered"); 731 return tt; 732 } 733 } else 734 mmask = m->rm_mask; 735 if (mmask == netmask) { 736 m->rm_refs++; 737 tt->rn_mklist = m; 738 return tt; 739 } 740 if (rn_refines(netmask, mmask) 741 || rn_lexobetter(netmask, mmask)) 742 break; 743 } 744 *mp = rn_new_radix_mask(tt, *mp); 745 return tt; 746} 747 748struct radix_node * 749rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head) 750{ 751 struct radix_node *t, *p, *x, *tt; 752 struct radix_mask *m, *saved_m, **mp; 753 struct radix_node *dupedkey, *saved_tt, *top; 754 caddr_t v, netmask; 755 int b, head_off, vlen; 756 757 v = v_arg; 758 netmask = netmask_arg; 759 x = head->rnh_treetop; 760 tt = rn_search(v, x); 761 head_off = x->rn_offset; 762 vlen = *(u_char *)v; 763 saved_tt = tt; 764 top = x; 765 if (tt == 0 || 766 Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) 767 return (NULL); 768 /* 769 * Delete our route from mask lists. 770 */ 771 if (netmask) { 772 if ((x = rn_addmask(netmask, 1, head_off)) == 0) 773 return (NULL); 774 netmask = x->rn_key; 775 while (tt->rn_mask != netmask) 776 if ((tt = tt->rn_dupedkey) == 0) 777 return (NULL); 778 } 779 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) 780 goto on1; 781 if (tt->rn_flags & RNF_NORMAL) { 782 if (m->rm_leaf != tt || m->rm_refs > 0) { 783 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 784 return NULL; /* dangling ref could cause disaster */ 785 } 786 } else { 787 if (m->rm_mask != tt->rn_mask) { 788 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 789 goto on1; 790 } 791 if (--m->rm_refs >= 0) 792 goto on1; 793 } 794 b = -1 - tt->rn_bit; 795 t = saved_tt->rn_parent; 796 if (b > t->rn_bit) 797 goto on1; /* Wasn't lifted at all */ 798 do { 799 x = t; 800 t = t->rn_parent; 801 } while (b <= t->rn_bit && x != top); 802 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 803 if (m == saved_m) { 804 *mp = m->rm_mklist; 805 MKFree(m); 806 break; 807 } 808 if (m == 0) { 809 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 810 if (tt->rn_flags & RNF_NORMAL) 811 return (NULL); /* Dangling ref to us */ 812 } 813on1: 814 /* 815 * Eliminate us from tree 816 */ 817 if (tt->rn_flags & RNF_ROOT) 818 return (NULL); 819 head->rnh_cnt--; 820#ifdef RN_DEBUG 821 /* Get us out of the creation list */ 822 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {} 823 if (t) t->rn_ybro = tt->rn_ybro; 824#endif 825 t = tt->rn_parent; 826 dupedkey = saved_tt->rn_dupedkey; 827 if (dupedkey) { 828 /* 829 * at this point, tt is the deletion target and saved_tt 830 * is the head of the dupekey chain 831 */ 832 if (tt == saved_tt) { 833 /* remove from head of chain */ 834 x = dupedkey; x->rn_parent = t; 835 if (t->rn_left == tt) 836 t->rn_left = x; 837 else 838 t->rn_right = x; 839 } else { 840 /* find node in front of tt on the chain */ 841 for (x = p = saved_tt; p && p->rn_dupedkey != tt;) 842 p = p->rn_dupedkey; 843 if (p) { 844 p->rn_dupedkey = tt->rn_dupedkey; 845 if (tt->rn_dupedkey) /* parent */ 846 tt->rn_dupedkey->rn_parent = p; 847 /* parent */ 848 } else log(LOG_ERR, "rn_delete: couldn't find us\n"); 849 } 850 t = tt + 1; 851 if (t->rn_flags & RNF_ACTIVE) { 852#ifndef RN_DEBUG 853 *++x = *t; 854 p = t->rn_parent; 855#else 856 b = t->rn_info; 857 *++x = *t; 858 t->rn_info = b; 859 p = t->rn_parent; 860#endif 861 if (p->rn_left == t) 862 p->rn_left = x; 863 else 864 p->rn_right = x; 865 x->rn_left->rn_parent = x; 866 x->rn_right->rn_parent = x; 867 } 868 goto out; 869 } 870 if (t->rn_left == tt) 871 x = t->rn_right; 872 else 873 x = t->rn_left; 874 p = t->rn_parent; 875 if (p->rn_right == t) 876 p->rn_right = x; 877 else 878 p->rn_left = x; 879 x->rn_parent = p; 880 /* 881 * Demote routes attached to us. 882 */ 883 if (t->rn_mklist) { 884 if (x->rn_bit >= 0) { 885 for (mp = &x->rn_mklist; (m = *mp);) 886 mp = &m->rm_mklist; 887 *mp = t->rn_mklist; 888 } else { 889 /* If there are any key,mask pairs in a sibling 890 duped-key chain, some subset will appear sorted 891 in the same order attached to our mklist */ 892 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 893 if (m == x->rn_mklist) { 894 struct radix_mask *mm = m->rm_mklist; 895 x->rn_mklist = NULL; 896 if (--(m->rm_refs) < 0) 897 MKFree(m); 898 m = mm; 899 } 900 if (m) 901 log(LOG_ERR, 902 "rn_delete: Orphaned Mask %p at %p\n", 903 (void *)m, (void *)x); 904 } 905 } 906 /* 907 * We may be holding an active internal node in the tree. 908 */ 909 x = tt + 1; 910 if (t != x) { 911#ifndef RN_DEBUG 912 *t = *x; 913#else 914 b = t->rn_info; 915 *t = *x; 916 t->rn_info = b; 917#endif 918 t->rn_left->rn_parent = t; 919 t->rn_right->rn_parent = t; 920 p = x->rn_parent; 921 if (p->rn_left == x) 922 p->rn_left = t; 923 else 924 p->rn_right = t; 925 } 926out: 927 tt->rn_flags &= ~RNF_ACTIVE; 928 tt[1].rn_flags &= ~RNF_ACTIVE; 929 return (tt); 930} 931 932/* 933 * This is the same as rn_walktree() except for the parameters and the 934 * exit. 935 */ 936static int 937rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, 938 void *w) 939{ 940 int error; 941 struct radix_node *base, *next; 942 u_char *xa = (u_char *)a; 943 u_char *xm = (u_char *)m; 944 struct radix_node *rn, *last; 945 int stopping; 946 int lastb; 947 int rnh_cnt; 948 949 /* 950 * This gets complicated because we may delete the node while 951 * applying the function f to it; we cannot simply use the next 952 * leaf as the successor node in advance, because that leaf may 953 * be removed as well during deletion when it is a clone of the 954 * current node. When that happens, we would end up referring 955 * to an already-freed radix node as the successor node. To get 956 * around this issue, if we detect that the radix tree has changed 957 * in dimension (smaller than before), we simply restart the walk 958 * from the top of tree. 959 */ 960restart: 961 last = NULL; 962 stopping = 0; 963 rnh_cnt = h->rnh_cnt; 964 965 /* 966 * rn_search_m is sort-of-open-coded here. 967 */ 968 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { 969 last = rn; 970 if (!(rn->rn_bmask & xm[rn->rn_offset])) 971 break; 972 973 if (rn->rn_bmask & xa[rn->rn_offset]) 974 rn = rn->rn_right; 975 else 976 rn = rn->rn_left; 977 } 978 979 /* 980 * Two cases: either we stepped off the end of our mask, 981 * in which case last == rn, or we reached a leaf, in which 982 * case we want to start from the last node we looked at. 983 * Either way, last is the node we want to start from. 984 */ 985 rn = last; 986 lastb = rn->rn_bit; 987 988 /* First time through node, go left */ 989 while (rn->rn_bit >= 0) 990 rn = rn->rn_left; 991 992 while (!stopping) { 993 base = rn; 994 /* If at right child go back up, otherwise, go right */ 995 while (rn->rn_parent->rn_right == rn 996 && !(rn->rn_flags & RNF_ROOT)) { 997 rn = rn->rn_parent; 998 999 /* if went up beyond last, stop */ 1000 if (rn->rn_bit <= lastb) { 1001 stopping = 1; 1002 /* 1003 * XXX we should jump to the 'Process leaves' 1004 * part, because the values of 'rn' and 'next' 1005 * we compute will not be used. Not a big deal 1006 * because this loop will terminate, but it is 1007 * inefficient and hard to understand! 1008 */ 1009 } 1010 } 1011 1012 /* 1013 * The following code (bug fix) inherited from FreeBSD is 1014 * currently disabled, because our implementation uses the 1015 * RTF_PRCLONING scheme that has been abandoned in current 1016 * FreeBSD release. The scheme involves setting such a flag 1017 * for the default route entry, and therefore all off-link 1018 * destinations would become clones of that entry. Enabling 1019 * the following code would be problematic at this point, 1020 * because the removal of default route would cause only 1021 * the left-half of the tree to be traversed, leaving the 1022 * right-half untouched. If there are clones of the entry 1023 * that reside in that right-half, they would not be deleted 1024 * and would linger around until they expire or explicitly 1025 * deleted, which is a very bad thing. 1026 * 1027 * This code should be uncommented only after we get rid 1028 * of the RTF_PRCLONING scheme. 1029 */ 1030#if 0 1031 /* 1032 * At the top of the tree, no need to traverse the right 1033 * half, prevent the traversal of the entire tree in the 1034 * case of default route. 1035 */ 1036 if (rn->rn_parent->rn_flags & RNF_ROOT) 1037 stopping = 1; 1038#endif 1039 1040 /* Find the next *leaf* to start from */ 1041 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1042 rn = rn->rn_left; 1043 next = rn; 1044 /* Process leaves */ 1045 while ((rn = base) != 0) { 1046 base = rn->rn_dupedkey; 1047 if (!(rn->rn_flags & RNF_ROOT) 1048 && (error = (*f)(rn, w))) 1049 return (error); 1050 } 1051 /* If one or more nodes got deleted, restart from top */ 1052 if (h->rnh_cnt < rnh_cnt) 1053 goto restart; 1054 rn = next; 1055 if (rn->rn_flags & RNF_ROOT) 1056 stopping = 1; 1057 } 1058 return 0; 1059} 1060 1061static int 1062rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w) 1063{ 1064 int error; 1065 struct radix_node *base, *next; 1066 struct radix_node *rn; 1067 int rnh_cnt; 1068 1069 /* 1070 * This gets complicated because we may delete the node while 1071 * applying the function f to it; we cannot simply use the next 1072 * leaf as the successor node in advance, because that leaf may 1073 * be removed as well during deletion when it is a clone of the 1074 * current node. When that happens, we would end up referring 1075 * to an already-freed radix node as the successor node. To get 1076 * around this issue, if we detect that the radix tree has changed 1077 * in dimension (smaller than before), we simply restart the walk 1078 * from the top of tree. 1079 */ 1080restart: 1081 rn = h->rnh_treetop; 1082 rnh_cnt = h->rnh_cnt; 1083 1084 /* First time through node, go left */ 1085 while (rn->rn_bit >= 0) 1086 rn = rn->rn_left; 1087 for (;;) { 1088 base = rn; 1089 /* If at right child go back up, otherwise, go right */ 1090 while (rn->rn_parent->rn_right == rn && 1091 (rn->rn_flags & RNF_ROOT) == 0) 1092 rn = rn->rn_parent; 1093 /* Find the next *leaf* to start from */ 1094 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1095 rn = rn->rn_left; 1096 next = rn; 1097 /* Process leaves */ 1098 while ((rn = base) != NULL) { 1099 base = rn->rn_dupedkey; 1100 if (!(rn->rn_flags & RNF_ROOT) 1101 && (error = (*f)(rn, w))) 1102 return (error); 1103 } 1104 /* If one or more nodes got deleted, restart from top */ 1105 if (h->rnh_cnt < rnh_cnt) 1106 goto restart; 1107 rn = next; 1108 if (rn->rn_flags & RNF_ROOT) 1109 return (0); 1110 } 1111 /* NOTREACHED */ 1112} 1113 1114int 1115rn_inithead(void **head, int off) 1116{ 1117 struct radix_node_head *rnh; 1118 struct radix_node *t, *tt, *ttt; 1119 if (*head) 1120 return (1); 1121 R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh)); 1122 if (rnh == 0) 1123 return (0); 1124 Bzero(rnh, sizeof (*rnh)); 1125 *head = rnh; 1126 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1127 ttt = rnh->rnh_nodes + 2; 1128 t->rn_right = ttt; 1129 t->rn_parent = t; 1130 tt = t->rn_left; 1131 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1132 tt->rn_bit = -1 - off; 1133 *ttt = *tt; 1134 ttt->rn_key = rn_ones; 1135 rnh->rnh_addaddr = rn_addroute; 1136 rnh->rnh_deladdr = rn_delete; 1137 rnh->rnh_matchaddr = rn_match; 1138 rnh->rnh_matchaddr_args = rn_match_args; 1139 rnh->rnh_lookup = rn_lookup; 1140 rnh->rnh_lookup_args = rn_lookup_args; 1141 rnh->rnh_walktree = rn_walktree; 1142 rnh->rnh_walktree_from = rn_walktree_from; 1143 rnh->rnh_treetop = t; 1144 rnh->rnh_cnt = 3; 1145 return (1); 1146} 1147 1148void 1149rn_init(void) 1150{ 1151 char *cp, *cplim; 1152#ifdef KERNEL 1153 struct domain *dom; 1154 1155 /* lock already held when rn_init is called */ 1156 for (dom = domains; dom; dom = dom->dom_next) 1157 if (dom->dom_maxrtkey > max_keylen) 1158 max_keylen = dom->dom_maxrtkey; 1159#endif 1160 if (max_keylen == 0) { 1161 log(LOG_ERR, 1162 "rn_init: radix functions require max_keylen be set\n"); 1163 return; 1164 } 1165 R_Malloc(rn_zeros, char *, 3 * max_keylen); 1166 if (rn_zeros == NULL) 1167 panic("rn_init"); 1168 Bzero(rn_zeros, 3 * max_keylen); 1169 rn_ones = cp = rn_zeros + max_keylen; 1170 addmask_key = cplim = rn_ones + max_keylen; 1171 while (cp < cplim) 1172 *cp++ = -1; 1173 if (rn_inithead((void **)&mask_rnhead, 0) == 0) 1174 panic("rn_init 2"); 1175} 1176