1178525Sjb/*
2178525Sjb * CDDL HEADER START
3178525Sjb *
4178525Sjb * The contents of this file are subject to the terms of the
5178525Sjb * Common Development and Distribution License (the "License").
6178525Sjb * You may not use this file except in compliance with the License.
7178525Sjb *
8178525Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9178525Sjb * or http://www.opensolaris.org/os/licensing.
10178525Sjb * See the License for the specific language governing permissions
11178525Sjb * and limitations under the License.
12178525Sjb *
13178525Sjb * When distributing Covered Code, include this CDDL HEADER in each
14178525Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15178525Sjb * If applicable, add the following below this CDDL HEADER, with the
16178525Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17178525Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18178525Sjb *
19178525Sjb * CDDL HEADER END
20178525Sjb */
21178525Sjb/*
22210767Srpaulo * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23178525Sjb * Use is subject to license terms.
24178525Sjb */
25178525Sjb
26178525Sjb/*
27178525Sjb * AVL - generic AVL tree implementation for kernel use
28178525Sjb *
29178525Sjb * A complete description of AVL trees can be found in many CS textbooks.
30178525Sjb *
31178525Sjb * Here is a very brief overview. An AVL tree is a binary search tree that is
32178525Sjb * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
33178525Sjb * any given node, the left and right subtrees are allowed to differ in height
34178525Sjb * by at most 1 level.
35178525Sjb *
36178525Sjb * This relaxation from a perfectly balanced binary tree allows doing
37178525Sjb * insertion and deletion relatively efficiently. Searching the tree is
38178525Sjb * still a fast operation, roughly O(log(N)).
39178525Sjb *
40178525Sjb * The key to insertion and deletion is a set of tree maniuplations called
41178525Sjb * rotations, which bring unbalanced subtrees back into the semi-balanced state.
42178525Sjb *
43178525Sjb * This implementation of AVL trees has the following peculiarities:
44178525Sjb *
45178525Sjb *	- The AVL specific data structures are physically embedded as fields
46178525Sjb *	  in the "using" data structures.  To maintain generality the code
47178525Sjb *	  must constantly translate between "avl_node_t *" and containing
48178525Sjb *	  data structure "void *"s by adding/subracting the avl_offset.
49178525Sjb *
50178525Sjb *	- Since the AVL data is always embedded in other structures, there is
51178525Sjb *	  no locking or memory allocation in the AVL routines. This must be
52178525Sjb *	  provided for by the enclosing data structure's semantics. Typically,
53178525Sjb *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
54178525Sjb *	  exclusive write lock. Other operations require a read lock.
55178525Sjb *
56178525Sjb *      - The implementation uses iteration instead of explicit recursion,
57178525Sjb *	  since it is intended to run on limited size kernel stacks. Since
58178525Sjb *	  there is no recursion stack present to move "up" in the tree,
59178525Sjb *	  there is an explicit "parent" link in the avl_node_t.
60178525Sjb *
61178525Sjb *      - The left/right children pointers of a node are in an array.
62178525Sjb *	  In the code, variables (instead of constants) are used to represent
63178525Sjb *	  left and right indices.  The implementation is written as if it only
64178525Sjb *	  dealt with left handed manipulations.  By changing the value assigned
65178525Sjb *	  to "left", the code also works for right handed trees.  The
66178525Sjb *	  following variables/terms are frequently used:
67178525Sjb *
68178525Sjb *		int left;	// 0 when dealing with left children,
69178525Sjb *				// 1 for dealing with right children
70178525Sjb *
71178525Sjb *		int left_heavy;	// -1 when left subtree is taller at some node,
72178525Sjb *				// +1 when right subtree is taller
73178525Sjb *
74178525Sjb *		int right;	// will be the opposite of left (0 or 1)
75178525Sjb *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
76178525Sjb *
77178525Sjb *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
78178525Sjb *
79178525Sjb *	  Though it is a little more confusing to read the code, the approach
80178525Sjb *	  allows using half as much code (and hence cache footprint) for tree
81178525Sjb *	  manipulations and eliminates many conditional branches.
82178525Sjb *
83178525Sjb *	- The avl_index_t is an opaque "cookie" used to find nodes at or
84178525Sjb *	  adjacent to where a new value would be inserted in the tree. The value
85178525Sjb *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
86178525Sjb *	  pointer) is set to indicate if that the new node has a value greater
87178525Sjb *	  than the value of the indicated "avl_node_t *".
88178525Sjb */
89178525Sjb
90178525Sjb#include <sys/types.h>
91178525Sjb#include <sys/param.h>
92178525Sjb#include <sys/debug.h>
93178525Sjb#include <sys/avl.h>
94178525Sjb#include <sys/cmn_err.h>
95178525Sjb
96178525Sjb/*
97178525Sjb * Small arrays to translate between balance (or diff) values and child indeces.
98178525Sjb *
99178525Sjb * Code that deals with binary tree data structures will randomly use
100178525Sjb * left and right children when examining a tree.  C "if()" statements
101178525Sjb * which evaluate randomly suffer from very poor hardware branch prediction.
102178525Sjb * In this code we avoid some of the branch mispredictions by using the
103178525Sjb * following translation arrays. They replace random branches with an
104178525Sjb * additional memory reference. Since the translation arrays are both very
105178525Sjb * small the data should remain efficiently in cache.
106178525Sjb */
107178525Sjbstatic const int  avl_child2balance[2]	= {-1, 1};
108178525Sjbstatic const int  avl_balance2child[]	= {0, 0, 1};
109178525Sjb
110178525Sjb
111178525Sjb/*
112178525Sjb * Walk from one node to the previous valued node (ie. an infix walk
113178525Sjb * towards the left). At any given node we do one of 2 things:
114178525Sjb *
115178525Sjb * - If there is a left child, go to it, then to it's rightmost descendant.
116178525Sjb *
117178525Sjb * - otherwise we return thru parent nodes until we've come from a right child.
118178525Sjb *
119178525Sjb * Return Value:
120178525Sjb * NULL - if at the end of the nodes
121178525Sjb * otherwise next node
122178525Sjb */
123178525Sjbvoid *
124178525Sjbavl_walk(avl_tree_t *tree, void	*oldnode, int left)
125178525Sjb{
126178525Sjb	size_t off = tree->avl_offset;
127178525Sjb	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
128178525Sjb	int right = 1 - left;
129178525Sjb	int was_child;
130178525Sjb
131178525Sjb
132178525Sjb	/*
133178525Sjb	 * nowhere to walk to if tree is empty
134178525Sjb	 */
135178525Sjb	if (node == NULL)
136178525Sjb		return (NULL);
137178525Sjb
138178525Sjb	/*
139178525Sjb	 * Visit the previous valued node. There are two possibilities:
140178525Sjb	 *
141178525Sjb	 * If this node has a left child, go down one left, then all
142178525Sjb	 * the way right.
143178525Sjb	 */
144178525Sjb	if (node->avl_child[left] != NULL) {
145178525Sjb		for (node = node->avl_child[left];
146178525Sjb		    node->avl_child[right] != NULL;
147178525Sjb		    node = node->avl_child[right])
148178525Sjb			;
149178525Sjb	/*
150178525Sjb	 * Otherwise, return thru left children as far as we can.
151178525Sjb	 */
152178525Sjb	} else {
153178525Sjb		for (;;) {
154178525Sjb			was_child = AVL_XCHILD(node);
155178525Sjb			node = AVL_XPARENT(node);
156178525Sjb			if (node == NULL)
157178525Sjb				return (NULL);
158178525Sjb			if (was_child == right)
159178525Sjb				break;
160178525Sjb		}
161178525Sjb	}
162178525Sjb
163178525Sjb	return (AVL_NODE2DATA(node, off));
164178525Sjb}
165178525Sjb
166178525Sjb/*
167178525Sjb * Return the lowest valued node in a tree or NULL.
168178525Sjb * (leftmost child from root of tree)
169178525Sjb */
170178525Sjbvoid *
171178525Sjbavl_first(avl_tree_t *tree)
172178525Sjb{
173178525Sjb	avl_node_t *node;
174178525Sjb	avl_node_t *prev = NULL;
175178525Sjb	size_t off = tree->avl_offset;
176178525Sjb
177178525Sjb	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
178178525Sjb		prev = node;
179178525Sjb
180178525Sjb	if (prev != NULL)
181178525Sjb		return (AVL_NODE2DATA(prev, off));
182178525Sjb	return (NULL);
183178525Sjb}
184178525Sjb
185178525Sjb/*
186178525Sjb * Return the highest valued node in a tree or NULL.
187178525Sjb * (rightmost child from root of tree)
188178525Sjb */
189178525Sjbvoid *
190178525Sjbavl_last(avl_tree_t *tree)
191178525Sjb{
192178525Sjb	avl_node_t *node;
193178525Sjb	avl_node_t *prev = NULL;
194178525Sjb	size_t off = tree->avl_offset;
195178525Sjb
196178525Sjb	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
197178525Sjb		prev = node;
198178525Sjb
199178525Sjb	if (prev != NULL)
200178525Sjb		return (AVL_NODE2DATA(prev, off));
201178525Sjb	return (NULL);
202178525Sjb}
203178525Sjb
204178525Sjb/*
205178525Sjb * Access the node immediately before or after an insertion point.
206178525Sjb *
207178525Sjb * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
208178525Sjb *
209178525Sjb * Return value:
210178525Sjb *	NULL: no node in the given direction
211178525Sjb *	"void *"  of the found tree node
212178525Sjb */
213178525Sjbvoid *
214178525Sjbavl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
215178525Sjb{
216178525Sjb	int child = AVL_INDEX2CHILD(where);
217178525Sjb	avl_node_t *node = AVL_INDEX2NODE(where);
218178525Sjb	void *data;
219178525Sjb	size_t off = tree->avl_offset;
220178525Sjb
221178525Sjb	if (node == NULL) {
222178525Sjb		ASSERT(tree->avl_root == NULL);
223178525Sjb		return (NULL);
224178525Sjb	}
225178525Sjb	data = AVL_NODE2DATA(node, off);
226178525Sjb	if (child != direction)
227178525Sjb		return (data);
228178525Sjb
229178525Sjb	return (avl_walk(tree, data, direction));
230178525Sjb}
231178525Sjb
232178525Sjb
233178525Sjb/*
234178525Sjb * Search for the node which contains "value".  The algorithm is a
235178525Sjb * simple binary tree search.
236178525Sjb *
237178525Sjb * return value:
238178525Sjb *	NULL: the value is not in the AVL tree
239178525Sjb *		*where (if not NULL)  is set to indicate the insertion point
240178525Sjb *	"void *"  of the found tree node
241178525Sjb */
242178525Sjbvoid *
243210767Srpauloavl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
244178525Sjb{
245178525Sjb	avl_node_t *node;
246178525Sjb	avl_node_t *prev = NULL;
247178525Sjb	int child = 0;
248178525Sjb	int diff;
249178525Sjb	size_t off = tree->avl_offset;
250178525Sjb
251178525Sjb	for (node = tree->avl_root; node != NULL;
252178525Sjb	    node = node->avl_child[child]) {
253178525Sjb
254178525Sjb		prev = node;
255178525Sjb
256178525Sjb		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
257178525Sjb		ASSERT(-1 <= diff && diff <= 1);
258178525Sjb		if (diff == 0) {
259178525Sjb#ifdef DEBUG
260178525Sjb			if (where != NULL)
261178525Sjb				*where = 0;
262178525Sjb#endif
263178525Sjb			return (AVL_NODE2DATA(node, off));
264178525Sjb		}
265178525Sjb		child = avl_balance2child[1 + diff];
266178525Sjb
267178525Sjb	}
268178525Sjb
269178525Sjb	if (where != NULL)
270178525Sjb		*where = AVL_MKINDEX(prev, child);
271178525Sjb
272178525Sjb	return (NULL);
273178525Sjb}
274178525Sjb
275178525Sjb
276178525Sjb/*
277178525Sjb * Perform a rotation to restore balance at the subtree given by depth.
278178525Sjb *
279178525Sjb * This routine is used by both insertion and deletion. The return value
280178525Sjb * indicates:
281178525Sjb *	 0 : subtree did not change height
282178525Sjb *	!0 : subtree was reduced in height
283178525Sjb *
284178525Sjb * The code is written as if handling left rotations, right rotations are
285178525Sjb * symmetric and handled by swapping values of variables right/left[_heavy]
286178525Sjb *
287178525Sjb * On input balance is the "new" balance at "node". This value is either
288178525Sjb * -2 or +2.
289178525Sjb */
290178525Sjbstatic int
291178525Sjbavl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
292178525Sjb{
293178525Sjb	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
294178525Sjb	int right = 1 - left;
295178525Sjb	int left_heavy = balance >> 1;
296178525Sjb	int right_heavy = -left_heavy;
297178525Sjb	avl_node_t *parent = AVL_XPARENT(node);
298178525Sjb	avl_node_t *child = node->avl_child[left];
299178525Sjb	avl_node_t *cright;
300178525Sjb	avl_node_t *gchild;
301178525Sjb	avl_node_t *gright;
302178525Sjb	avl_node_t *gleft;
303178525Sjb	int which_child = AVL_XCHILD(node);
304178525Sjb	int child_bal = AVL_XBALANCE(child);
305178525Sjb
306178525Sjb	/* BEGIN CSTYLED */
307178525Sjb	/*
308178525Sjb	 * case 1 : node is overly left heavy, the left child is balanced or
309178525Sjb	 * also left heavy. This requires the following rotation.
310178525Sjb	 *
311178525Sjb	 *                   (node bal:-2)
312178525Sjb	 *                    /           \
313178525Sjb	 *                   /             \
314178525Sjb	 *              (child bal:0 or -1)
315178525Sjb	 *              /    \
316178525Sjb	 *             /      \
317178525Sjb	 *                     cright
318178525Sjb	 *
319178525Sjb	 * becomes:
320178525Sjb	 *
321178525Sjb	 *              (child bal:1 or 0)
322178525Sjb	 *              /        \
323178525Sjb	 *             /          \
324178525Sjb	 *                        (node bal:-1 or 0)
325178525Sjb	 *                         /     \
326178525Sjb	 *                        /       \
327178525Sjb	 *                     cright
328178525Sjb	 *
329178525Sjb	 * we detect this situation by noting that child's balance is not
330178525Sjb	 * right_heavy.
331178525Sjb	 */
332178525Sjb	/* END CSTYLED */
333178525Sjb	if (child_bal != right_heavy) {
334178525Sjb
335178525Sjb		/*
336178525Sjb		 * compute new balance of nodes
337178525Sjb		 *
338178525Sjb		 * If child used to be left heavy (now balanced) we reduced
339178525Sjb		 * the height of this sub-tree -- used in "return...;" below
340178525Sjb		 */
341178525Sjb		child_bal += right_heavy; /* adjust towards right */
342178525Sjb
343178525Sjb		/*
344178525Sjb		 * move "cright" to be node's left child
345178525Sjb		 */
346178525Sjb		cright = child->avl_child[right];
347178525Sjb		node->avl_child[left] = cright;
348178525Sjb		if (cright != NULL) {
349178525Sjb			AVL_SETPARENT(cright, node);
350178525Sjb			AVL_SETCHILD(cright, left);
351178525Sjb		}
352178525Sjb
353178525Sjb		/*
354178525Sjb		 * move node to be child's right child
355178525Sjb		 */
356178525Sjb		child->avl_child[right] = node;
357178525Sjb		AVL_SETBALANCE(node, -child_bal);
358178525Sjb		AVL_SETCHILD(node, right);
359178525Sjb		AVL_SETPARENT(node, child);
360178525Sjb
361178525Sjb		/*
362178525Sjb		 * update the pointer into this subtree
363178525Sjb		 */
364178525Sjb		AVL_SETBALANCE(child, child_bal);
365178525Sjb		AVL_SETCHILD(child, which_child);
366178525Sjb		AVL_SETPARENT(child, parent);
367178525Sjb		if (parent != NULL)
368178525Sjb			parent->avl_child[which_child] = child;
369178525Sjb		else
370178525Sjb			tree->avl_root = child;
371178525Sjb
372178525Sjb		return (child_bal == 0);
373178525Sjb	}
374178525Sjb
375178525Sjb	/* BEGIN CSTYLED */
376178525Sjb	/*
377178525Sjb	 * case 2 : When node is left heavy, but child is right heavy we use
378178525Sjb	 * a different rotation.
379178525Sjb	 *
380178525Sjb	 *                   (node b:-2)
381178525Sjb	 *                    /   \
382178525Sjb	 *                   /     \
383178525Sjb	 *                  /       \
384178525Sjb	 *             (child b:+1)
385178525Sjb	 *              /     \
386178525Sjb	 *             /       \
387178525Sjb	 *                   (gchild b: != 0)
388178525Sjb	 *                     /  \
389178525Sjb	 *                    /    \
390178525Sjb	 *                 gleft   gright
391178525Sjb	 *
392178525Sjb	 * becomes:
393178525Sjb	 *
394178525Sjb	 *              (gchild b:0)
395178525Sjb	 *              /       \
396178525Sjb	 *             /         \
397178525Sjb	 *            /           \
398178525Sjb	 *        (child b:?)   (node b:?)
399178525Sjb	 *         /  \          /   \
400178525Sjb	 *        /    \        /     \
401178525Sjb	 *            gleft   gright
402178525Sjb	 *
403178525Sjb	 * computing the new balances is more complicated. As an example:
404178525Sjb	 *	 if gchild was right_heavy, then child is now left heavy
405178525Sjb	 *		else it is balanced
406178525Sjb	 */
407178525Sjb	/* END CSTYLED */
408178525Sjb	gchild = child->avl_child[right];
409178525Sjb	gleft = gchild->avl_child[left];
410178525Sjb	gright = gchild->avl_child[right];
411178525Sjb
412178525Sjb	/*
413178525Sjb	 * move gright to left child of node and
414178525Sjb	 *
415178525Sjb	 * move gleft to right child of node
416178525Sjb	 */
417178525Sjb	node->avl_child[left] = gright;
418178525Sjb	if (gright != NULL) {
419178525Sjb		AVL_SETPARENT(gright, node);
420178525Sjb		AVL_SETCHILD(gright, left);
421178525Sjb	}
422178525Sjb
423178525Sjb	child->avl_child[right] = gleft;
424178525Sjb	if (gleft != NULL) {
425178525Sjb		AVL_SETPARENT(gleft, child);
426178525Sjb		AVL_SETCHILD(gleft, right);
427178525Sjb	}
428178525Sjb
429178525Sjb	/*
430178525Sjb	 * move child to left child of gchild and
431178525Sjb	 *
432178525Sjb	 * move node to right child of gchild and
433178525Sjb	 *
434178525Sjb	 * fixup parent of all this to point to gchild
435178525Sjb	 */
436178525Sjb	balance = AVL_XBALANCE(gchild);
437178525Sjb	gchild->avl_child[left] = child;
438178525Sjb	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
439178525Sjb	AVL_SETPARENT(child, gchild);
440178525Sjb	AVL_SETCHILD(child, left);
441178525Sjb
442178525Sjb	gchild->avl_child[right] = node;
443178525Sjb	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
444178525Sjb	AVL_SETPARENT(node, gchild);
445178525Sjb	AVL_SETCHILD(node, right);
446178525Sjb
447178525Sjb	AVL_SETBALANCE(gchild, 0);
448178525Sjb	AVL_SETPARENT(gchild, parent);
449178525Sjb	AVL_SETCHILD(gchild, which_child);
450178525Sjb	if (parent != NULL)
451178525Sjb		parent->avl_child[which_child] = gchild;
452178525Sjb	else
453178525Sjb		tree->avl_root = gchild;
454178525Sjb
455178525Sjb	return (1);	/* the new tree is always shorter */
456178525Sjb}
457178525Sjb
458178525Sjb
459178525Sjb/*
460178525Sjb * Insert a new node into an AVL tree at the specified (from avl_find()) place.
461178525Sjb *
462178525Sjb * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
463178525Sjb * searches out to the leaf positions.  The avl_index_t indicates the node
464178525Sjb * which will be the parent of the new node.
465178525Sjb *
466178525Sjb * After the node is inserted, a single rotation further up the tree may
467178525Sjb * be necessary to maintain an acceptable AVL balance.
468178525Sjb */
469178525Sjbvoid
470178525Sjbavl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
471178525Sjb{
472178525Sjb	avl_node_t *node;
473178525Sjb	avl_node_t *parent = AVL_INDEX2NODE(where);
474178525Sjb	int old_balance;
475178525Sjb	int new_balance;
476178525Sjb	int which_child = AVL_INDEX2CHILD(where);
477178525Sjb	size_t off = tree->avl_offset;
478178525Sjb
479178525Sjb	ASSERT(tree);
480178525Sjb#ifdef _LP64
481178525Sjb	ASSERT(((uintptr_t)new_data & 0x7) == 0);
482178525Sjb#endif
483178525Sjb
484178525Sjb	node = AVL_DATA2NODE(new_data, off);
485178525Sjb
486178525Sjb	/*
487178525Sjb	 * First, add the node to the tree at the indicated position.
488178525Sjb	 */
489178525Sjb	++tree->avl_numnodes;
490178525Sjb
491178525Sjb	node->avl_child[0] = NULL;
492178525Sjb	node->avl_child[1] = NULL;
493178525Sjb
494178525Sjb	AVL_SETCHILD(node, which_child);
495178525Sjb	AVL_SETBALANCE(node, 0);
496178525Sjb	AVL_SETPARENT(node, parent);
497178525Sjb	if (parent != NULL) {
498178525Sjb		ASSERT(parent->avl_child[which_child] == NULL);
499178525Sjb		parent->avl_child[which_child] = node;
500178525Sjb	} else {
501178525Sjb		ASSERT(tree->avl_root == NULL);
502178525Sjb		tree->avl_root = node;
503178525Sjb	}
504178525Sjb	/*
505178525Sjb	 * Now, back up the tree modifying the balance of all nodes above the
506178525Sjb	 * insertion point. If we get to a highly unbalanced ancestor, we
507178525Sjb	 * need to do a rotation.  If we back out of the tree we are done.
508178525Sjb	 * If we brought any subtree into perfect balance (0), we are also done.
509178525Sjb	 */
510178525Sjb	for (;;) {
511178525Sjb		node = parent;
512178525Sjb		if (node == NULL)
513178525Sjb			return;
514178525Sjb
515178525Sjb		/*
516178525Sjb		 * Compute the new balance
517178525Sjb		 */
518178525Sjb		old_balance = AVL_XBALANCE(node);
519178525Sjb		new_balance = old_balance + avl_child2balance[which_child];
520178525Sjb
521178525Sjb		/*
522178525Sjb		 * If we introduced equal balance, then we are done immediately
523178525Sjb		 */
524178525Sjb		if (new_balance == 0) {
525178525Sjb			AVL_SETBALANCE(node, 0);
526178525Sjb			return;
527178525Sjb		}
528178525Sjb
529178525Sjb		/*
530178525Sjb		 * If both old and new are not zero we went
531178525Sjb		 * from -1 to -2 balance, do a rotation.
532178525Sjb		 */
533178525Sjb		if (old_balance != 0)
534178525Sjb			break;
535178525Sjb
536178525Sjb		AVL_SETBALANCE(node, new_balance);
537178525Sjb		parent = AVL_XPARENT(node);
538178525Sjb		which_child = AVL_XCHILD(node);
539178525Sjb	}
540178525Sjb
541178525Sjb	/*
542178525Sjb	 * perform a rotation to fix the tree and return
543178525Sjb	 */
544178525Sjb	(void) avl_rotation(tree, node, new_balance);
545178525Sjb}
546178525Sjb
547178525Sjb/*
548178525Sjb * Insert "new_data" in "tree" in the given "direction" either after or
549178525Sjb * before (AVL_AFTER, AVL_BEFORE) the data "here".
550178525Sjb *
551178525Sjb * Insertions can only be done at empty leaf points in the tree, therefore
552178525Sjb * if the given child of the node is already present we move to either
553178525Sjb * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
554178525Sjb * every other node in the tree is a leaf, this always works.
555178525Sjb *
556178525Sjb * To help developers using this interface, we assert that the new node
557178525Sjb * is correctly ordered at every step of the way in DEBUG kernels.
558178525Sjb */
559178525Sjbvoid
560178525Sjbavl_insert_here(
561178525Sjb	avl_tree_t *tree,
562178525Sjb	void *new_data,
563178525Sjb	void *here,
564178525Sjb	int direction)
565178525Sjb{
566178525Sjb	avl_node_t *node;
567178525Sjb	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
568178525Sjb#ifdef DEBUG
569178525Sjb	int diff;
570178525Sjb#endif
571178525Sjb
572178525Sjb	ASSERT(tree != NULL);
573178525Sjb	ASSERT(new_data != NULL);
574178525Sjb	ASSERT(here != NULL);
575178525Sjb	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
576178525Sjb
577178525Sjb	/*
578178525Sjb	 * If corresponding child of node is not NULL, go to the neighboring
579178525Sjb	 * node and reverse the insertion direction.
580178525Sjb	 */
581178525Sjb	node = AVL_DATA2NODE(here, tree->avl_offset);
582178525Sjb
583178525Sjb#ifdef DEBUG
584178525Sjb	diff = tree->avl_compar(new_data, here);
585178525Sjb	ASSERT(-1 <= diff && diff <= 1);
586178525Sjb	ASSERT(diff != 0);
587178525Sjb	ASSERT(diff > 0 ? child == 1 : child == 0);
588178525Sjb#endif
589178525Sjb
590178525Sjb	if (node->avl_child[child] != NULL) {
591178525Sjb		node = node->avl_child[child];
592178525Sjb		child = 1 - child;
593178525Sjb		while (node->avl_child[child] != NULL) {
594178525Sjb#ifdef DEBUG
595178525Sjb			diff = tree->avl_compar(new_data,
596178525Sjb			    AVL_NODE2DATA(node, tree->avl_offset));
597178525Sjb			ASSERT(-1 <= diff && diff <= 1);
598178525Sjb			ASSERT(diff != 0);
599178525Sjb			ASSERT(diff > 0 ? child == 1 : child == 0);
600178525Sjb#endif
601178525Sjb			node = node->avl_child[child];
602178525Sjb		}
603178525Sjb#ifdef DEBUG
604178525Sjb		diff = tree->avl_compar(new_data,
605178525Sjb		    AVL_NODE2DATA(node, tree->avl_offset));
606178525Sjb		ASSERT(-1 <= diff && diff <= 1);
607178525Sjb		ASSERT(diff != 0);
608178525Sjb		ASSERT(diff > 0 ? child == 1 : child == 0);
609178525Sjb#endif
610178525Sjb	}
611178525Sjb	ASSERT(node->avl_child[child] == NULL);
612178525Sjb
613178525Sjb	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
614178525Sjb}
615178525Sjb
616178525Sjb/*
617178525Sjb * Add a new node to an AVL tree.
618178525Sjb */
619178525Sjbvoid
620178525Sjbavl_add(avl_tree_t *tree, void *new_node)
621178525Sjb{
622178525Sjb	avl_index_t where;
623178525Sjb
624178525Sjb	/*
625178525Sjb	 * This is unfortunate.  We want to call panic() here, even for
626178525Sjb	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
627178525Sjb	 * in libc or else the rtld build process gets confused.  So, all we can
628178525Sjb	 * do in userland is resort to a normal ASSERT().
629178525Sjb	 */
630178525Sjb	if (avl_find(tree, new_node, &where) != NULL)
631178525Sjb#ifdef _KERNEL
632178525Sjb		panic("avl_find() succeeded inside avl_add()");
633178525Sjb#else
634178525Sjb		ASSERT(0);
635178525Sjb#endif
636178525Sjb	avl_insert(tree, new_node, where);
637178525Sjb}
638178525Sjb
639178525Sjb/*
640178525Sjb * Delete a node from the AVL tree.  Deletion is similar to insertion, but
641178525Sjb * with 2 complications.
642178525Sjb *
643178525Sjb * First, we may be deleting an interior node. Consider the following subtree:
644178525Sjb *
645178525Sjb *     d           c            c
646178525Sjb *    / \         / \          / \
647178525Sjb *   b   e       b   e        b   e
648178525Sjb *  / \	        / \          /
649178525Sjb * a   c       a            a
650178525Sjb *
651178525Sjb * When we are deleting node (d), we find and bring up an adjacent valued leaf
652178525Sjb * node, say (c), to take the interior node's place. In the code this is
653178525Sjb * handled by temporarily swapping (d) and (c) in the tree and then using
654178525Sjb * common code to delete (d) from the leaf position.
655178525Sjb *
656178525Sjb * Secondly, an interior deletion from a deep tree may require more than one
657178525Sjb * rotation to fix the balance. This is handled by moving up the tree through
658178525Sjb * parents and applying rotations as needed. The return value from
659178525Sjb * avl_rotation() is used to detect when a subtree did not change overall
660178525Sjb * height due to a rotation.
661178525Sjb */
662178525Sjbvoid
663178525Sjbavl_remove(avl_tree_t *tree, void *data)
664178525Sjb{
665178525Sjb	avl_node_t *delete;
666178525Sjb	avl_node_t *parent;
667178525Sjb	avl_node_t *node;
668178525Sjb	avl_node_t tmp;
669178525Sjb	int old_balance;
670178525Sjb	int new_balance;
671178525Sjb	int left;
672178525Sjb	int right;
673178525Sjb	int which_child;
674178525Sjb	size_t off = tree->avl_offset;
675178525Sjb
676178525Sjb	ASSERT(tree);
677178525Sjb
678178525Sjb	delete = AVL_DATA2NODE(data, off);
679178525Sjb
680178525Sjb	/*
681178525Sjb	 * Deletion is easiest with a node that has at most 1 child.
682178525Sjb	 * We swap a node with 2 children with a sequentially valued
683178525Sjb	 * neighbor node. That node will have at most 1 child. Note this
684178525Sjb	 * has no effect on the ordering of the remaining nodes.
685178525Sjb	 *
686178525Sjb	 * As an optimization, we choose the greater neighbor if the tree
687178525Sjb	 * is right heavy, otherwise the left neighbor. This reduces the
688178525Sjb	 * number of rotations needed.
689178525Sjb	 */
690178525Sjb	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
691178525Sjb
692178525Sjb		/*
693178525Sjb		 * choose node to swap from whichever side is taller
694178525Sjb		 */
695178525Sjb		old_balance = AVL_XBALANCE(delete);
696178525Sjb		left = avl_balance2child[old_balance + 1];
697178525Sjb		right = 1 - left;
698178525Sjb
699178525Sjb		/*
700178525Sjb		 * get to the previous value'd node
701178525Sjb		 * (down 1 left, as far as possible right)
702178525Sjb		 */
703178525Sjb		for (node = delete->avl_child[left];
704178525Sjb		    node->avl_child[right] != NULL;
705178525Sjb		    node = node->avl_child[right])
706178525Sjb			;
707178525Sjb
708178525Sjb		/*
709178525Sjb		 * create a temp placeholder for 'node'
710178525Sjb		 * move 'node' to delete's spot in the tree
711178525Sjb		 */
712178525Sjb		tmp = *node;
713178525Sjb
714178525Sjb		*node = *delete;
715178525Sjb		if (node->avl_child[left] == node)
716178525Sjb			node->avl_child[left] = &tmp;
717178525Sjb
718178525Sjb		parent = AVL_XPARENT(node);
719178525Sjb		if (parent != NULL)
720178525Sjb			parent->avl_child[AVL_XCHILD(node)] = node;
721178525Sjb		else
722178525Sjb			tree->avl_root = node;
723178525Sjb		AVL_SETPARENT(node->avl_child[left], node);
724178525Sjb		AVL_SETPARENT(node->avl_child[right], node);
725178525Sjb
726178525Sjb		/*
727178525Sjb		 * Put tmp where node used to be (just temporary).
728178525Sjb		 * It always has a parent and at most 1 child.
729178525Sjb		 */
730178525Sjb		delete = &tmp;
731178525Sjb		parent = AVL_XPARENT(delete);
732178525Sjb		parent->avl_child[AVL_XCHILD(delete)] = delete;
733178525Sjb		which_child = (delete->avl_child[1] != 0);
734178525Sjb		if (delete->avl_child[which_child] != NULL)
735178525Sjb			AVL_SETPARENT(delete->avl_child[which_child], delete);
736178525Sjb	}
737178525Sjb
738178525Sjb
739178525Sjb	/*
740178525Sjb	 * Here we know "delete" is at least partially a leaf node. It can
741178525Sjb	 * be easily removed from the tree.
742178525Sjb	 */
743178525Sjb	ASSERT(tree->avl_numnodes > 0);
744178525Sjb	--tree->avl_numnodes;
745178525Sjb	parent = AVL_XPARENT(delete);
746178525Sjb	which_child = AVL_XCHILD(delete);
747178525Sjb	if (delete->avl_child[0] != NULL)
748178525Sjb		node = delete->avl_child[0];
749178525Sjb	else
750178525Sjb		node = delete->avl_child[1];
751178525Sjb
752178525Sjb	/*
753178525Sjb	 * Connect parent directly to node (leaving out delete).
754178525Sjb	 */
755178525Sjb	if (node != NULL) {
756178525Sjb		AVL_SETPARENT(node, parent);
757178525Sjb		AVL_SETCHILD(node, which_child);
758178525Sjb	}
759178525Sjb	if (parent == NULL) {
760178525Sjb		tree->avl_root = node;
761178525Sjb		return;
762178525Sjb	}
763178525Sjb	parent->avl_child[which_child] = node;
764178525Sjb
765178525Sjb
766178525Sjb	/*
767178525Sjb	 * Since the subtree is now shorter, begin adjusting parent balances
768178525Sjb	 * and performing any needed rotations.
769178525Sjb	 */
770178525Sjb	do {
771178525Sjb
772178525Sjb		/*
773178525Sjb		 * Move up the tree and adjust the balance
774178525Sjb		 *
775178525Sjb		 * Capture the parent and which_child values for the next
776178525Sjb		 * iteration before any rotations occur.
777178525Sjb		 */
778178525Sjb		node = parent;
779178525Sjb		old_balance = AVL_XBALANCE(node);
780178525Sjb		new_balance = old_balance - avl_child2balance[which_child];
781178525Sjb		parent = AVL_XPARENT(node);
782178525Sjb		which_child = AVL_XCHILD(node);
783178525Sjb
784178525Sjb		/*
785178525Sjb		 * If a node was in perfect balance but isn't anymore then
786178525Sjb		 * we can stop, since the height didn't change above this point
787178525Sjb		 * due to a deletion.
788178525Sjb		 */
789178525Sjb		if (old_balance == 0) {
790178525Sjb			AVL_SETBALANCE(node, new_balance);
791178525Sjb			break;
792178525Sjb		}
793178525Sjb
794178525Sjb		/*
795178525Sjb		 * If the new balance is zero, we don't need to rotate
796178525Sjb		 * else
797178525Sjb		 * need a rotation to fix the balance.
798178525Sjb		 * If the rotation doesn't change the height
799178525Sjb		 * of the sub-tree we have finished adjusting.
800178525Sjb		 */
801178525Sjb		if (new_balance == 0)
802178525Sjb			AVL_SETBALANCE(node, new_balance);
803178525Sjb		else if (!avl_rotation(tree, node, new_balance))
804178525Sjb			break;
805178525Sjb	} while (parent != NULL);
806178525Sjb}
807178525Sjb
808210767Srpaulo#define	AVL_REINSERT(tree, obj)		\
809210767Srpaulo	avl_remove((tree), (obj));	\
810210767Srpaulo	avl_add((tree), (obj))
811210767Srpaulo
812210767Srpauloboolean_t
813210767Srpauloavl_update_lt(avl_tree_t *t, void *obj)
814210767Srpaulo{
815210767Srpaulo	void *neighbor;
816210767Srpaulo
817210767Srpaulo	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
818210767Srpaulo	    (t->avl_compar(obj, neighbor) <= 0));
819210767Srpaulo
820210767Srpaulo	neighbor = AVL_PREV(t, obj);
821210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
822210767Srpaulo		AVL_REINSERT(t, obj);
823210767Srpaulo		return (B_TRUE);
824210767Srpaulo	}
825210767Srpaulo
826210767Srpaulo	return (B_FALSE);
827210767Srpaulo}
828210767Srpaulo
829210767Srpauloboolean_t
830210767Srpauloavl_update_gt(avl_tree_t *t, void *obj)
831210767Srpaulo{
832210767Srpaulo	void *neighbor;
833210767Srpaulo
834210767Srpaulo	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
835210767Srpaulo	    (t->avl_compar(obj, neighbor) >= 0));
836210767Srpaulo
837210767Srpaulo	neighbor = AVL_NEXT(t, obj);
838210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
839210767Srpaulo		AVL_REINSERT(t, obj);
840210767Srpaulo		return (B_TRUE);
841210767Srpaulo	}
842210767Srpaulo
843210767Srpaulo	return (B_FALSE);
844210767Srpaulo}
845210767Srpaulo
846210767Srpauloboolean_t
847210767Srpauloavl_update(avl_tree_t *t, void *obj)
848210767Srpaulo{
849210767Srpaulo	void *neighbor;
850210767Srpaulo
851210767Srpaulo	neighbor = AVL_PREV(t, obj);
852210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
853210767Srpaulo		AVL_REINSERT(t, obj);
854210767Srpaulo		return (B_TRUE);
855210767Srpaulo	}
856210767Srpaulo
857210767Srpaulo	neighbor = AVL_NEXT(t, obj);
858210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
859210767Srpaulo		AVL_REINSERT(t, obj);
860210767Srpaulo		return (B_TRUE);
861210767Srpaulo	}
862210767Srpaulo
863210767Srpaulo	return (B_FALSE);
864210767Srpaulo}
865210767Srpaulo
866178525Sjb/*
867178525Sjb * initialize a new AVL tree
868178525Sjb */
869178525Sjbvoid
870178525Sjbavl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
871178525Sjb    size_t size, size_t offset)
872178525Sjb{
873178525Sjb	ASSERT(tree);
874178525Sjb	ASSERT(compar);
875178525Sjb	ASSERT(size > 0);
876178525Sjb	ASSERT(size >= offset + sizeof (avl_node_t));
877178525Sjb#ifdef _LP64
878178525Sjb	ASSERT((offset & 0x7) == 0);
879178525Sjb#endif
880178525Sjb
881178525Sjb	tree->avl_compar = compar;
882178525Sjb	tree->avl_root = NULL;
883178525Sjb	tree->avl_numnodes = 0;
884178525Sjb	tree->avl_size = size;
885178525Sjb	tree->avl_offset = offset;
886178525Sjb}
887178525Sjb
888178525Sjb/*
889178525Sjb * Delete a tree.
890178525Sjb */
891178525Sjb/* ARGSUSED */
892178525Sjbvoid
893178525Sjbavl_destroy(avl_tree_t *tree)
894178525Sjb{
895178525Sjb	ASSERT(tree);
896178525Sjb	ASSERT(tree->avl_numnodes == 0);
897178525Sjb	ASSERT(tree->avl_root == NULL);
898178525Sjb}
899178525Sjb
900178525Sjb
901178525Sjb/*
902178525Sjb * Return the number of nodes in an AVL tree.
903178525Sjb */
904178525Sjbulong_t
905178525Sjbavl_numnodes(avl_tree_t *tree)
906178525Sjb{
907178525Sjb	ASSERT(tree);
908178525Sjb	return (tree->avl_numnodes);
909178525Sjb}
910178525Sjb
911210767Srpauloboolean_t
912210767Srpauloavl_is_empty(avl_tree_t *tree)
913210767Srpaulo{
914210767Srpaulo	ASSERT(tree);
915210767Srpaulo	return (tree->avl_numnodes == 0);
916210767Srpaulo}
917178525Sjb
918178525Sjb#define	CHILDBIT	(1L)
919178525Sjb
920178525Sjb/*
921178525Sjb * Post-order tree walk used to visit all tree nodes and destroy the tree
922178525Sjb * in post order. This is used for destroying a tree w/o paying any cost
923178525Sjb * for rebalancing it.
924178525Sjb *
925178525Sjb * example:
926178525Sjb *
927178525Sjb *	void *cookie = NULL;
928178525Sjb *	my_data_t *node;
929178525Sjb *
930178525Sjb *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
931178525Sjb *		free(node);
932178525Sjb *	avl_destroy(tree);
933178525Sjb *
934178525Sjb * The cookie is really an avl_node_t to the current node's parent and
935178525Sjb * an indication of which child you looked at last.
936178525Sjb *
937178525Sjb * On input, a cookie value of CHILDBIT indicates the tree is done.
938178525Sjb */
939178525Sjbvoid *
940178525Sjbavl_destroy_nodes(avl_tree_t *tree, void **cookie)
941178525Sjb{
942178525Sjb	avl_node_t	*node;
943178525Sjb	avl_node_t	*parent;
944178525Sjb	int		child;
945178525Sjb	void		*first;
946178525Sjb	size_t		off = tree->avl_offset;
947178525Sjb
948178525Sjb	/*
949178525Sjb	 * Initial calls go to the first node or it's right descendant.
950178525Sjb	 */
951178525Sjb	if (*cookie == NULL) {
952178525Sjb		first = avl_first(tree);
953178525Sjb
954178525Sjb		/*
955178525Sjb		 * deal with an empty tree
956178525Sjb		 */
957178525Sjb		if (first == NULL) {
958178525Sjb			*cookie = (void *)CHILDBIT;
959178525Sjb			return (NULL);
960178525Sjb		}
961178525Sjb
962178525Sjb		node = AVL_DATA2NODE(first, off);
963178525Sjb		parent = AVL_XPARENT(node);
964178525Sjb		goto check_right_side;
965178525Sjb	}
966178525Sjb
967178525Sjb	/*
968178525Sjb	 * If there is no parent to return to we are done.
969178525Sjb	 */
970178525Sjb	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
971178525Sjb	if (parent == NULL) {
972178525Sjb		if (tree->avl_root != NULL) {
973178525Sjb			ASSERT(tree->avl_numnodes == 1);
974178525Sjb			tree->avl_root = NULL;
975178525Sjb			tree->avl_numnodes = 0;
976178525Sjb		}
977178525Sjb		return (NULL);
978178525Sjb	}
979178525Sjb
980178525Sjb	/*
981178525Sjb	 * Remove the child pointer we just visited from the parent and tree.
982178525Sjb	 */
983178525Sjb	child = (uintptr_t)(*cookie) & CHILDBIT;
984178525Sjb	parent->avl_child[child] = NULL;
985178525Sjb	ASSERT(tree->avl_numnodes > 1);
986178525Sjb	--tree->avl_numnodes;
987178525Sjb
988178525Sjb	/*
989178525Sjb	 * If we just did a right child or there isn't one, go up to parent.
990178525Sjb	 */
991178525Sjb	if (child == 1 || parent->avl_child[1] == NULL) {
992178525Sjb		node = parent;
993178525Sjb		parent = AVL_XPARENT(parent);
994178525Sjb		goto done;
995178525Sjb	}
996178525Sjb
997178525Sjb	/*
998178525Sjb	 * Do parent's right child, then leftmost descendent.
999178525Sjb	 */
1000178525Sjb	node = parent->avl_child[1];
1001178525Sjb	while (node->avl_child[0] != NULL) {
1002178525Sjb		parent = node;
1003178525Sjb		node = node->avl_child[0];
1004178525Sjb	}
1005178525Sjb
1006178525Sjb	/*
1007178525Sjb	 * If here, we moved to a left child. It may have one
1008178525Sjb	 * child on the right (when balance == +1).
1009178525Sjb	 */
1010178525Sjbcheck_right_side:
1011178525Sjb	if (node->avl_child[1] != NULL) {
1012178525Sjb		ASSERT(AVL_XBALANCE(node) == 1);
1013178525Sjb		parent = node;
1014178525Sjb		node = node->avl_child[1];
1015178525Sjb		ASSERT(node->avl_child[0] == NULL &&
1016178525Sjb		    node->avl_child[1] == NULL);
1017178525Sjb	} else {
1018178525Sjb		ASSERT(AVL_XBALANCE(node) <= 0);
1019178525Sjb	}
1020178525Sjb
1021178525Sjbdone:
1022178525Sjb	if (parent == NULL) {
1023178525Sjb		*cookie = (void *)CHILDBIT;
1024178525Sjb		ASSERT(node == tree->avl_root);
1025178525Sjb	} else {
1026178525Sjb		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1027178525Sjb	}
1028178525Sjb
1029178525Sjb	return (AVL_NODE2DATA(node, off));
1030178525Sjb}
1031