1178525Sjb/*
2178525Sjb * CDDL HEADER START
3178525Sjb *
4178525Sjb * The contents of this file are subject to the terms of the
5178525Sjb * Common Development and Distribution License (the "License").
6178525Sjb * You may not use this file except in compliance with the License.
7178525Sjb *
8178525Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9178525Sjb * or http://www.opensolaris.org/os/licensing.
10178525Sjb * See the License for the specific language governing permissions
11178525Sjb * and limitations under the License.
12178525Sjb *
13178525Sjb * When distributing Covered Code, include this CDDL HEADER in each
14178525Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15178525Sjb * If applicable, add the following below this CDDL HEADER, with the
16178525Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17178525Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18178525Sjb *
19178525Sjb * CDDL HEADER END
20178525Sjb */
21178525Sjb/*
22210767Srpaulo * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23178525Sjb * Use is subject to license terms.
24178525Sjb */
25178525Sjb
26178525Sjb/*
27269845Sdelphij * Copyright (c) 2014 by Delphix. All rights reserved.
28269845Sdelphij */
29269845Sdelphij
30269845Sdelphij/*
31178525Sjb * AVL - generic AVL tree implementation for kernel use
32178525Sjb *
33178525Sjb * A complete description of AVL trees can be found in many CS textbooks.
34178525Sjb *
35178525Sjb * Here is a very brief overview. An AVL tree is a binary search tree that is
36178525Sjb * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
37178525Sjb * any given node, the left and right subtrees are allowed to differ in height
38178525Sjb * by at most 1 level.
39178525Sjb *
40178525Sjb * This relaxation from a perfectly balanced binary tree allows doing
41178525Sjb * insertion and deletion relatively efficiently. Searching the tree is
42178525Sjb * still a fast operation, roughly O(log(N)).
43178525Sjb *
44269845Sdelphij * The key to insertion and deletion is a set of tree manipulations called
45178525Sjb * rotations, which bring unbalanced subtrees back into the semi-balanced state.
46178525Sjb *
47178525Sjb * This implementation of AVL trees has the following peculiarities:
48178525Sjb *
49178525Sjb *	- The AVL specific data structures are physically embedded as fields
50178525Sjb *	  in the "using" data structures.  To maintain generality the code
51178525Sjb *	  must constantly translate between "avl_node_t *" and containing
52269845Sdelphij *	  data structure "void *"s by adding/subtracting the avl_offset.
53178525Sjb *
54178525Sjb *	- Since the AVL data is always embedded in other structures, there is
55178525Sjb *	  no locking or memory allocation in the AVL routines. This must be
56178525Sjb *	  provided for by the enclosing data structure's semantics. Typically,
57178525Sjb *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
58178525Sjb *	  exclusive write lock. Other operations require a read lock.
59178525Sjb *
60178525Sjb *      - The implementation uses iteration instead of explicit recursion,
61178525Sjb *	  since it is intended to run on limited size kernel stacks. Since
62178525Sjb *	  there is no recursion stack present to move "up" in the tree,
63178525Sjb *	  there is an explicit "parent" link in the avl_node_t.
64178525Sjb *
65178525Sjb *      - The left/right children pointers of a node are in an array.
66178525Sjb *	  In the code, variables (instead of constants) are used to represent
67178525Sjb *	  left and right indices.  The implementation is written as if it only
68178525Sjb *	  dealt with left handed manipulations.  By changing the value assigned
69178525Sjb *	  to "left", the code also works for right handed trees.  The
70178525Sjb *	  following variables/terms are frequently used:
71178525Sjb *
72178525Sjb *		int left;	// 0 when dealing with left children,
73178525Sjb *				// 1 for dealing with right children
74178525Sjb *
75178525Sjb *		int left_heavy;	// -1 when left subtree is taller at some node,
76178525Sjb *				// +1 when right subtree is taller
77178525Sjb *
78178525Sjb *		int right;	// will be the opposite of left (0 or 1)
79178525Sjb *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
80178525Sjb *
81178525Sjb *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
82178525Sjb *
83178525Sjb *	  Though it is a little more confusing to read the code, the approach
84178525Sjb *	  allows using half as much code (and hence cache footprint) for tree
85178525Sjb *	  manipulations and eliminates many conditional branches.
86178525Sjb *
87178525Sjb *	- The avl_index_t is an opaque "cookie" used to find nodes at or
88178525Sjb *	  adjacent to where a new value would be inserted in the tree. The value
89178525Sjb *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
90178525Sjb *	  pointer) is set to indicate if that the new node has a value greater
91178525Sjb *	  than the value of the indicated "avl_node_t *".
92269845Sdelphij *
93269845Sdelphij * Note - in addition to userland (e.g. libavl and libutil) and the kernel
94269845Sdelphij * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
95269845Sdelphij * which each have their own compilation environments and subsequent
96269845Sdelphij * requirements. Each of these environments must be considered when adding
97269845Sdelphij * dependencies from avl.c.
98178525Sjb */
99178525Sjb
100178525Sjb#include <sys/types.h>
101178525Sjb#include <sys/param.h>
102178525Sjb#include <sys/debug.h>
103178525Sjb#include <sys/avl.h>
104178525Sjb#include <sys/cmn_err.h>
105178525Sjb
106178525Sjb/*
107269845Sdelphij * Small arrays to translate between balance (or diff) values and child indices.
108178525Sjb *
109178525Sjb * Code that deals with binary tree data structures will randomly use
110178525Sjb * left and right children when examining a tree.  C "if()" statements
111178525Sjb * which evaluate randomly suffer from very poor hardware branch prediction.
112178525Sjb * In this code we avoid some of the branch mispredictions by using the
113178525Sjb * following translation arrays. They replace random branches with an
114178525Sjb * additional memory reference. Since the translation arrays are both very
115178525Sjb * small the data should remain efficiently in cache.
116178525Sjb */
117178525Sjbstatic const int  avl_child2balance[2]	= {-1, 1};
118178525Sjbstatic const int  avl_balance2child[]	= {0, 0, 1};
119178525Sjb
120178525Sjb
121178525Sjb/*
122178525Sjb * Walk from one node to the previous valued node (ie. an infix walk
123178525Sjb * towards the left). At any given node we do one of 2 things:
124178525Sjb *
125178525Sjb * - If there is a left child, go to it, then to it's rightmost descendant.
126178525Sjb *
127269845Sdelphij * - otherwise we return through parent nodes until we've come from a right
128269845Sdelphij *   child.
129178525Sjb *
130178525Sjb * Return Value:
131178525Sjb * NULL - if at the end of the nodes
132178525Sjb * otherwise next node
133178525Sjb */
134178525Sjbvoid *
135178525Sjbavl_walk(avl_tree_t *tree, void	*oldnode, int left)
136178525Sjb{
137178525Sjb	size_t off = tree->avl_offset;
138178525Sjb	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
139178525Sjb	int right = 1 - left;
140178525Sjb	int was_child;
141178525Sjb
142178525Sjb
143178525Sjb	/*
144178525Sjb	 * nowhere to walk to if tree is empty
145178525Sjb	 */
146178525Sjb	if (node == NULL)
147178525Sjb		return (NULL);
148178525Sjb
149178525Sjb	/*
150178525Sjb	 * Visit the previous valued node. There are two possibilities:
151178525Sjb	 *
152178525Sjb	 * If this node has a left child, go down one left, then all
153178525Sjb	 * the way right.
154178525Sjb	 */
155178525Sjb	if (node->avl_child[left] != NULL) {
156178525Sjb		for (node = node->avl_child[left];
157178525Sjb		    node->avl_child[right] != NULL;
158178525Sjb		    node = node->avl_child[right])
159178525Sjb			;
160178525Sjb	/*
161178525Sjb	 * Otherwise, return thru left children as far as we can.
162178525Sjb	 */
163178525Sjb	} else {
164178525Sjb		for (;;) {
165178525Sjb			was_child = AVL_XCHILD(node);
166178525Sjb			node = AVL_XPARENT(node);
167178525Sjb			if (node == NULL)
168178525Sjb				return (NULL);
169178525Sjb			if (was_child == right)
170178525Sjb				break;
171178525Sjb		}
172178525Sjb	}
173178525Sjb
174178525Sjb	return (AVL_NODE2DATA(node, off));
175178525Sjb}
176178525Sjb
177178525Sjb/*
178178525Sjb * Return the lowest valued node in a tree or NULL.
179178525Sjb * (leftmost child from root of tree)
180178525Sjb */
181178525Sjbvoid *
182178525Sjbavl_first(avl_tree_t *tree)
183178525Sjb{
184178525Sjb	avl_node_t *node;
185178525Sjb	avl_node_t *prev = NULL;
186178525Sjb	size_t off = tree->avl_offset;
187178525Sjb
188178525Sjb	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
189178525Sjb		prev = node;
190178525Sjb
191178525Sjb	if (prev != NULL)
192178525Sjb		return (AVL_NODE2DATA(prev, off));
193178525Sjb	return (NULL);
194178525Sjb}
195178525Sjb
196178525Sjb/*
197178525Sjb * Return the highest valued node in a tree or NULL.
198178525Sjb * (rightmost child from root of tree)
199178525Sjb */
200178525Sjbvoid *
201178525Sjbavl_last(avl_tree_t *tree)
202178525Sjb{
203178525Sjb	avl_node_t *node;
204178525Sjb	avl_node_t *prev = NULL;
205178525Sjb	size_t off = tree->avl_offset;
206178525Sjb
207178525Sjb	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
208178525Sjb		prev = node;
209178525Sjb
210178525Sjb	if (prev != NULL)
211178525Sjb		return (AVL_NODE2DATA(prev, off));
212178525Sjb	return (NULL);
213178525Sjb}
214178525Sjb
215178525Sjb/*
216178525Sjb * Access the node immediately before or after an insertion point.
217178525Sjb *
218178525Sjb * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
219178525Sjb *
220178525Sjb * Return value:
221178525Sjb *	NULL: no node in the given direction
222178525Sjb *	"void *"  of the found tree node
223178525Sjb */
224178525Sjbvoid *
225178525Sjbavl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
226178525Sjb{
227178525Sjb	int child = AVL_INDEX2CHILD(where);
228178525Sjb	avl_node_t *node = AVL_INDEX2NODE(where);
229178525Sjb	void *data;
230178525Sjb	size_t off = tree->avl_offset;
231178525Sjb
232178525Sjb	if (node == NULL) {
233178525Sjb		ASSERT(tree->avl_root == NULL);
234178525Sjb		return (NULL);
235178525Sjb	}
236178525Sjb	data = AVL_NODE2DATA(node, off);
237178525Sjb	if (child != direction)
238178525Sjb		return (data);
239178525Sjb
240178525Sjb	return (avl_walk(tree, data, direction));
241178525Sjb}
242178525Sjb
243178525Sjb
244178525Sjb/*
245178525Sjb * Search for the node which contains "value".  The algorithm is a
246178525Sjb * simple binary tree search.
247178525Sjb *
248178525Sjb * return value:
249178525Sjb *	NULL: the value is not in the AVL tree
250178525Sjb *		*where (if not NULL)  is set to indicate the insertion point
251178525Sjb *	"void *"  of the found tree node
252178525Sjb */
253178525Sjbvoid *
254210767Srpauloavl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
255178525Sjb{
256178525Sjb	avl_node_t *node;
257178525Sjb	avl_node_t *prev = NULL;
258178525Sjb	int child = 0;
259178525Sjb	int diff;
260178525Sjb	size_t off = tree->avl_offset;
261178525Sjb
262178525Sjb	for (node = tree->avl_root; node != NULL;
263178525Sjb	    node = node->avl_child[child]) {
264178525Sjb
265178525Sjb		prev = node;
266178525Sjb
267178525Sjb		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
268178525Sjb		ASSERT(-1 <= diff && diff <= 1);
269178525Sjb		if (diff == 0) {
270178525Sjb#ifdef DEBUG
271178525Sjb			if (where != NULL)
272178525Sjb				*where = 0;
273178525Sjb#endif
274178525Sjb			return (AVL_NODE2DATA(node, off));
275178525Sjb		}
276178525Sjb		child = avl_balance2child[1 + diff];
277178525Sjb
278178525Sjb	}
279178525Sjb
280178525Sjb	if (where != NULL)
281178525Sjb		*where = AVL_MKINDEX(prev, child);
282178525Sjb
283178525Sjb	return (NULL);
284178525Sjb}
285178525Sjb
286178525Sjb
287178525Sjb/*
288178525Sjb * Perform a rotation to restore balance at the subtree given by depth.
289178525Sjb *
290178525Sjb * This routine is used by both insertion and deletion. The return value
291178525Sjb * indicates:
292178525Sjb *	 0 : subtree did not change height
293178525Sjb *	!0 : subtree was reduced in height
294178525Sjb *
295178525Sjb * The code is written as if handling left rotations, right rotations are
296178525Sjb * symmetric and handled by swapping values of variables right/left[_heavy]
297178525Sjb *
298178525Sjb * On input balance is the "new" balance at "node". This value is either
299178525Sjb * -2 or +2.
300178525Sjb */
301178525Sjbstatic int
302178525Sjbavl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
303178525Sjb{
304178525Sjb	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
305178525Sjb	int right = 1 - left;
306178525Sjb	int left_heavy = balance >> 1;
307178525Sjb	int right_heavy = -left_heavy;
308178525Sjb	avl_node_t *parent = AVL_XPARENT(node);
309178525Sjb	avl_node_t *child = node->avl_child[left];
310178525Sjb	avl_node_t *cright;
311178525Sjb	avl_node_t *gchild;
312178525Sjb	avl_node_t *gright;
313178525Sjb	avl_node_t *gleft;
314178525Sjb	int which_child = AVL_XCHILD(node);
315178525Sjb	int child_bal = AVL_XBALANCE(child);
316178525Sjb
317178525Sjb	/* BEGIN CSTYLED */
318178525Sjb	/*
319178525Sjb	 * case 1 : node is overly left heavy, the left child is balanced or
320178525Sjb	 * also left heavy. This requires the following rotation.
321178525Sjb	 *
322178525Sjb	 *                   (node bal:-2)
323178525Sjb	 *                    /           \
324178525Sjb	 *                   /             \
325178525Sjb	 *              (child bal:0 or -1)
326178525Sjb	 *              /    \
327178525Sjb	 *             /      \
328178525Sjb	 *                     cright
329178525Sjb	 *
330178525Sjb	 * becomes:
331178525Sjb	 *
332178525Sjb	 *              (child bal:1 or 0)
333178525Sjb	 *              /        \
334178525Sjb	 *             /          \
335178525Sjb	 *                        (node bal:-1 or 0)
336178525Sjb	 *                         /     \
337178525Sjb	 *                        /       \
338178525Sjb	 *                     cright
339178525Sjb	 *
340178525Sjb	 * we detect this situation by noting that child's balance is not
341178525Sjb	 * right_heavy.
342178525Sjb	 */
343178525Sjb	/* END CSTYLED */
344178525Sjb	if (child_bal != right_heavy) {
345178525Sjb
346178525Sjb		/*
347178525Sjb		 * compute new balance of nodes
348178525Sjb		 *
349178525Sjb		 * If child used to be left heavy (now balanced) we reduced
350178525Sjb		 * the height of this sub-tree -- used in "return...;" below
351178525Sjb		 */
352178525Sjb		child_bal += right_heavy; /* adjust towards right */
353178525Sjb
354178525Sjb		/*
355178525Sjb		 * move "cright" to be node's left child
356178525Sjb		 */
357178525Sjb		cright = child->avl_child[right];
358178525Sjb		node->avl_child[left] = cright;
359178525Sjb		if (cright != NULL) {
360178525Sjb			AVL_SETPARENT(cright, node);
361178525Sjb			AVL_SETCHILD(cright, left);
362178525Sjb		}
363178525Sjb
364178525Sjb		/*
365178525Sjb		 * move node to be child's right child
366178525Sjb		 */
367178525Sjb		child->avl_child[right] = node;
368178525Sjb		AVL_SETBALANCE(node, -child_bal);
369178525Sjb		AVL_SETCHILD(node, right);
370178525Sjb		AVL_SETPARENT(node, child);
371178525Sjb
372178525Sjb		/*
373178525Sjb		 * update the pointer into this subtree
374178525Sjb		 */
375178525Sjb		AVL_SETBALANCE(child, child_bal);
376178525Sjb		AVL_SETCHILD(child, which_child);
377178525Sjb		AVL_SETPARENT(child, parent);
378178525Sjb		if (parent != NULL)
379178525Sjb			parent->avl_child[which_child] = child;
380178525Sjb		else
381178525Sjb			tree->avl_root = child;
382178525Sjb
383178525Sjb		return (child_bal == 0);
384178525Sjb	}
385178525Sjb
386178525Sjb	/* BEGIN CSTYLED */
387178525Sjb	/*
388178525Sjb	 * case 2 : When node is left heavy, but child is right heavy we use
389178525Sjb	 * a different rotation.
390178525Sjb	 *
391178525Sjb	 *                   (node b:-2)
392178525Sjb	 *                    /   \
393178525Sjb	 *                   /     \
394178525Sjb	 *                  /       \
395178525Sjb	 *             (child b:+1)
396178525Sjb	 *              /     \
397178525Sjb	 *             /       \
398178525Sjb	 *                   (gchild b: != 0)
399178525Sjb	 *                     /  \
400178525Sjb	 *                    /    \
401178525Sjb	 *                 gleft   gright
402178525Sjb	 *
403178525Sjb	 * becomes:
404178525Sjb	 *
405178525Sjb	 *              (gchild b:0)
406178525Sjb	 *              /       \
407178525Sjb	 *             /         \
408178525Sjb	 *            /           \
409178525Sjb	 *        (child b:?)   (node b:?)
410178525Sjb	 *         /  \          /   \
411178525Sjb	 *        /    \        /     \
412178525Sjb	 *            gleft   gright
413178525Sjb	 *
414178525Sjb	 * computing the new balances is more complicated. As an example:
415178525Sjb	 *	 if gchild was right_heavy, then child is now left heavy
416178525Sjb	 *		else it is balanced
417178525Sjb	 */
418178525Sjb	/* END CSTYLED */
419178525Sjb	gchild = child->avl_child[right];
420178525Sjb	gleft = gchild->avl_child[left];
421178525Sjb	gright = gchild->avl_child[right];
422178525Sjb
423178525Sjb	/*
424178525Sjb	 * move gright to left child of node and
425178525Sjb	 *
426178525Sjb	 * move gleft to right child of node
427178525Sjb	 */
428178525Sjb	node->avl_child[left] = gright;
429178525Sjb	if (gright != NULL) {
430178525Sjb		AVL_SETPARENT(gright, node);
431178525Sjb		AVL_SETCHILD(gright, left);
432178525Sjb	}
433178525Sjb
434178525Sjb	child->avl_child[right] = gleft;
435178525Sjb	if (gleft != NULL) {
436178525Sjb		AVL_SETPARENT(gleft, child);
437178525Sjb		AVL_SETCHILD(gleft, right);
438178525Sjb	}
439178525Sjb
440178525Sjb	/*
441178525Sjb	 * move child to left child of gchild and
442178525Sjb	 *
443178525Sjb	 * move node to right child of gchild and
444178525Sjb	 *
445178525Sjb	 * fixup parent of all this to point to gchild
446178525Sjb	 */
447178525Sjb	balance = AVL_XBALANCE(gchild);
448178525Sjb	gchild->avl_child[left] = child;
449178525Sjb	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
450178525Sjb	AVL_SETPARENT(child, gchild);
451178525Sjb	AVL_SETCHILD(child, left);
452178525Sjb
453178525Sjb	gchild->avl_child[right] = node;
454178525Sjb	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
455178525Sjb	AVL_SETPARENT(node, gchild);
456178525Sjb	AVL_SETCHILD(node, right);
457178525Sjb
458178525Sjb	AVL_SETBALANCE(gchild, 0);
459178525Sjb	AVL_SETPARENT(gchild, parent);
460178525Sjb	AVL_SETCHILD(gchild, which_child);
461178525Sjb	if (parent != NULL)
462178525Sjb		parent->avl_child[which_child] = gchild;
463178525Sjb	else
464178525Sjb		tree->avl_root = gchild;
465178525Sjb
466178525Sjb	return (1);	/* the new tree is always shorter */
467178525Sjb}
468178525Sjb
469178525Sjb
470178525Sjb/*
471178525Sjb * Insert a new node into an AVL tree at the specified (from avl_find()) place.
472178525Sjb *
473178525Sjb * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
474178525Sjb * searches out to the leaf positions.  The avl_index_t indicates the node
475178525Sjb * which will be the parent of the new node.
476178525Sjb *
477178525Sjb * After the node is inserted, a single rotation further up the tree may
478178525Sjb * be necessary to maintain an acceptable AVL balance.
479178525Sjb */
480178525Sjbvoid
481178525Sjbavl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
482178525Sjb{
483178525Sjb	avl_node_t *node;
484178525Sjb	avl_node_t *parent = AVL_INDEX2NODE(where);
485178525Sjb	int old_balance;
486178525Sjb	int new_balance;
487178525Sjb	int which_child = AVL_INDEX2CHILD(where);
488178525Sjb	size_t off = tree->avl_offset;
489178525Sjb
490178525Sjb	ASSERT(tree);
491178525Sjb#ifdef _LP64
492178525Sjb	ASSERT(((uintptr_t)new_data & 0x7) == 0);
493178525Sjb#endif
494178525Sjb
495178525Sjb	node = AVL_DATA2NODE(new_data, off);
496178525Sjb
497178525Sjb	/*
498178525Sjb	 * First, add the node to the tree at the indicated position.
499178525Sjb	 */
500178525Sjb	++tree->avl_numnodes;
501178525Sjb
502178525Sjb	node->avl_child[0] = NULL;
503178525Sjb	node->avl_child[1] = NULL;
504178525Sjb
505178525Sjb	AVL_SETCHILD(node, which_child);
506178525Sjb	AVL_SETBALANCE(node, 0);
507178525Sjb	AVL_SETPARENT(node, parent);
508178525Sjb	if (parent != NULL) {
509178525Sjb		ASSERT(parent->avl_child[which_child] == NULL);
510178525Sjb		parent->avl_child[which_child] = node;
511178525Sjb	} else {
512178525Sjb		ASSERT(tree->avl_root == NULL);
513178525Sjb		tree->avl_root = node;
514178525Sjb	}
515178525Sjb	/*
516178525Sjb	 * Now, back up the tree modifying the balance of all nodes above the
517178525Sjb	 * insertion point. If we get to a highly unbalanced ancestor, we
518178525Sjb	 * need to do a rotation.  If we back out of the tree we are done.
519178525Sjb	 * If we brought any subtree into perfect balance (0), we are also done.
520178525Sjb	 */
521178525Sjb	for (;;) {
522178525Sjb		node = parent;
523178525Sjb		if (node == NULL)
524178525Sjb			return;
525178525Sjb
526178525Sjb		/*
527178525Sjb		 * Compute the new balance
528178525Sjb		 */
529178525Sjb		old_balance = AVL_XBALANCE(node);
530178525Sjb		new_balance = old_balance + avl_child2balance[which_child];
531178525Sjb
532178525Sjb		/*
533178525Sjb		 * If we introduced equal balance, then we are done immediately
534178525Sjb		 */
535178525Sjb		if (new_balance == 0) {
536178525Sjb			AVL_SETBALANCE(node, 0);
537178525Sjb			return;
538178525Sjb		}
539178525Sjb
540178525Sjb		/*
541178525Sjb		 * If both old and new are not zero we went
542178525Sjb		 * from -1 to -2 balance, do a rotation.
543178525Sjb		 */
544178525Sjb		if (old_balance != 0)
545178525Sjb			break;
546178525Sjb
547178525Sjb		AVL_SETBALANCE(node, new_balance);
548178525Sjb		parent = AVL_XPARENT(node);
549178525Sjb		which_child = AVL_XCHILD(node);
550178525Sjb	}
551178525Sjb
552178525Sjb	/*
553178525Sjb	 * perform a rotation to fix the tree and return
554178525Sjb	 */
555178525Sjb	(void) avl_rotation(tree, node, new_balance);
556178525Sjb}
557178525Sjb
558178525Sjb/*
559178525Sjb * Insert "new_data" in "tree" in the given "direction" either after or
560178525Sjb * before (AVL_AFTER, AVL_BEFORE) the data "here".
561178525Sjb *
562178525Sjb * Insertions can only be done at empty leaf points in the tree, therefore
563178525Sjb * if the given child of the node is already present we move to either
564178525Sjb * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
565178525Sjb * every other node in the tree is a leaf, this always works.
566178525Sjb *
567178525Sjb * To help developers using this interface, we assert that the new node
568178525Sjb * is correctly ordered at every step of the way in DEBUG kernels.
569178525Sjb */
570178525Sjbvoid
571178525Sjbavl_insert_here(
572178525Sjb	avl_tree_t *tree,
573178525Sjb	void *new_data,
574178525Sjb	void *here,
575178525Sjb	int direction)
576178525Sjb{
577178525Sjb	avl_node_t *node;
578178525Sjb	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
579178525Sjb#ifdef DEBUG
580178525Sjb	int diff;
581178525Sjb#endif
582178525Sjb
583178525Sjb	ASSERT(tree != NULL);
584178525Sjb	ASSERT(new_data != NULL);
585178525Sjb	ASSERT(here != NULL);
586178525Sjb	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
587178525Sjb
588178525Sjb	/*
589178525Sjb	 * If corresponding child of node is not NULL, go to the neighboring
590178525Sjb	 * node and reverse the insertion direction.
591178525Sjb	 */
592178525Sjb	node = AVL_DATA2NODE(here, tree->avl_offset);
593178525Sjb
594178525Sjb#ifdef DEBUG
595178525Sjb	diff = tree->avl_compar(new_data, here);
596178525Sjb	ASSERT(-1 <= diff && diff <= 1);
597178525Sjb	ASSERT(diff != 0);
598178525Sjb	ASSERT(diff > 0 ? child == 1 : child == 0);
599178525Sjb#endif
600178525Sjb
601178525Sjb	if (node->avl_child[child] != NULL) {
602178525Sjb		node = node->avl_child[child];
603178525Sjb		child = 1 - child;
604178525Sjb		while (node->avl_child[child] != NULL) {
605178525Sjb#ifdef DEBUG
606178525Sjb			diff = tree->avl_compar(new_data,
607178525Sjb			    AVL_NODE2DATA(node, tree->avl_offset));
608178525Sjb			ASSERT(-1 <= diff && diff <= 1);
609178525Sjb			ASSERT(diff != 0);
610178525Sjb			ASSERT(diff > 0 ? child == 1 : child == 0);
611178525Sjb#endif
612178525Sjb			node = node->avl_child[child];
613178525Sjb		}
614178525Sjb#ifdef DEBUG
615178525Sjb		diff = tree->avl_compar(new_data,
616178525Sjb		    AVL_NODE2DATA(node, tree->avl_offset));
617178525Sjb		ASSERT(-1 <= diff && diff <= 1);
618178525Sjb		ASSERT(diff != 0);
619178525Sjb		ASSERT(diff > 0 ? child == 1 : child == 0);
620178525Sjb#endif
621178525Sjb	}
622178525Sjb	ASSERT(node->avl_child[child] == NULL);
623178525Sjb
624178525Sjb	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
625178525Sjb}
626178525Sjb
627178525Sjb/*
628178525Sjb * Add a new node to an AVL tree.
629178525Sjb */
630178525Sjbvoid
631178525Sjbavl_add(avl_tree_t *tree, void *new_node)
632178525Sjb{
633178525Sjb	avl_index_t where;
634178525Sjb
635178525Sjb	/*
636178525Sjb	 * This is unfortunate.  We want to call panic() here, even for
637178525Sjb	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
638178525Sjb	 * in libc or else the rtld build process gets confused.  So, all we can
639178525Sjb	 * do in userland is resort to a normal ASSERT().
640178525Sjb	 */
641178525Sjb	if (avl_find(tree, new_node, &where) != NULL)
642178525Sjb#ifdef _KERNEL
643178525Sjb		panic("avl_find() succeeded inside avl_add()");
644178525Sjb#else
645178525Sjb		ASSERT(0);
646178525Sjb#endif
647178525Sjb	avl_insert(tree, new_node, where);
648178525Sjb}
649178525Sjb
650178525Sjb/*
651178525Sjb * Delete a node from the AVL tree.  Deletion is similar to insertion, but
652178525Sjb * with 2 complications.
653178525Sjb *
654178525Sjb * First, we may be deleting an interior node. Consider the following subtree:
655178525Sjb *
656178525Sjb *     d           c            c
657178525Sjb *    / \         / \          / \
658178525Sjb *   b   e       b   e        b   e
659178525Sjb *  / \	        / \          /
660178525Sjb * a   c       a            a
661178525Sjb *
662178525Sjb * When we are deleting node (d), we find and bring up an adjacent valued leaf
663178525Sjb * node, say (c), to take the interior node's place. In the code this is
664178525Sjb * handled by temporarily swapping (d) and (c) in the tree and then using
665178525Sjb * common code to delete (d) from the leaf position.
666178525Sjb *
667178525Sjb * Secondly, an interior deletion from a deep tree may require more than one
668178525Sjb * rotation to fix the balance. This is handled by moving up the tree through
669178525Sjb * parents and applying rotations as needed. The return value from
670178525Sjb * avl_rotation() is used to detect when a subtree did not change overall
671178525Sjb * height due to a rotation.
672178525Sjb */
673178525Sjbvoid
674178525Sjbavl_remove(avl_tree_t *tree, void *data)
675178525Sjb{
676178525Sjb	avl_node_t *delete;
677178525Sjb	avl_node_t *parent;
678178525Sjb	avl_node_t *node;
679178525Sjb	avl_node_t tmp;
680178525Sjb	int old_balance;
681178525Sjb	int new_balance;
682178525Sjb	int left;
683178525Sjb	int right;
684178525Sjb	int which_child;
685178525Sjb	size_t off = tree->avl_offset;
686178525Sjb
687178525Sjb	ASSERT(tree);
688178525Sjb
689178525Sjb	delete = AVL_DATA2NODE(data, off);
690178525Sjb
691178525Sjb	/*
692178525Sjb	 * Deletion is easiest with a node that has at most 1 child.
693178525Sjb	 * We swap a node with 2 children with a sequentially valued
694178525Sjb	 * neighbor node. That node will have at most 1 child. Note this
695178525Sjb	 * has no effect on the ordering of the remaining nodes.
696178525Sjb	 *
697178525Sjb	 * As an optimization, we choose the greater neighbor if the tree
698178525Sjb	 * is right heavy, otherwise the left neighbor. This reduces the
699178525Sjb	 * number of rotations needed.
700178525Sjb	 */
701178525Sjb	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
702178525Sjb
703178525Sjb		/*
704178525Sjb		 * choose node to swap from whichever side is taller
705178525Sjb		 */
706178525Sjb		old_balance = AVL_XBALANCE(delete);
707178525Sjb		left = avl_balance2child[old_balance + 1];
708178525Sjb		right = 1 - left;
709178525Sjb
710178525Sjb		/*
711178525Sjb		 * get to the previous value'd node
712178525Sjb		 * (down 1 left, as far as possible right)
713178525Sjb		 */
714178525Sjb		for (node = delete->avl_child[left];
715178525Sjb		    node->avl_child[right] != NULL;
716178525Sjb		    node = node->avl_child[right])
717178525Sjb			;
718178525Sjb
719178525Sjb		/*
720178525Sjb		 * create a temp placeholder for 'node'
721178525Sjb		 * move 'node' to delete's spot in the tree
722178525Sjb		 */
723178525Sjb		tmp = *node;
724178525Sjb
725178525Sjb		*node = *delete;
726178525Sjb		if (node->avl_child[left] == node)
727178525Sjb			node->avl_child[left] = &tmp;
728178525Sjb
729178525Sjb		parent = AVL_XPARENT(node);
730178525Sjb		if (parent != NULL)
731178525Sjb			parent->avl_child[AVL_XCHILD(node)] = node;
732178525Sjb		else
733178525Sjb			tree->avl_root = node;
734178525Sjb		AVL_SETPARENT(node->avl_child[left], node);
735178525Sjb		AVL_SETPARENT(node->avl_child[right], node);
736178525Sjb
737178525Sjb		/*
738178525Sjb		 * Put tmp where node used to be (just temporary).
739178525Sjb		 * It always has a parent and at most 1 child.
740178525Sjb		 */
741178525Sjb		delete = &tmp;
742178525Sjb		parent = AVL_XPARENT(delete);
743178525Sjb		parent->avl_child[AVL_XCHILD(delete)] = delete;
744178525Sjb		which_child = (delete->avl_child[1] != 0);
745178525Sjb		if (delete->avl_child[which_child] != NULL)
746178525Sjb			AVL_SETPARENT(delete->avl_child[which_child], delete);
747178525Sjb	}
748178525Sjb
749178525Sjb
750178525Sjb	/*
751178525Sjb	 * Here we know "delete" is at least partially a leaf node. It can
752178525Sjb	 * be easily removed from the tree.
753178525Sjb	 */
754178525Sjb	ASSERT(tree->avl_numnodes > 0);
755178525Sjb	--tree->avl_numnodes;
756178525Sjb	parent = AVL_XPARENT(delete);
757178525Sjb	which_child = AVL_XCHILD(delete);
758178525Sjb	if (delete->avl_child[0] != NULL)
759178525Sjb		node = delete->avl_child[0];
760178525Sjb	else
761178525Sjb		node = delete->avl_child[1];
762178525Sjb
763178525Sjb	/*
764178525Sjb	 * Connect parent directly to node (leaving out delete).
765178525Sjb	 */
766178525Sjb	if (node != NULL) {
767178525Sjb		AVL_SETPARENT(node, parent);
768178525Sjb		AVL_SETCHILD(node, which_child);
769178525Sjb	}
770178525Sjb	if (parent == NULL) {
771178525Sjb		tree->avl_root = node;
772178525Sjb		return;
773178525Sjb	}
774178525Sjb	parent->avl_child[which_child] = node;
775178525Sjb
776178525Sjb
777178525Sjb	/*
778178525Sjb	 * Since the subtree is now shorter, begin adjusting parent balances
779178525Sjb	 * and performing any needed rotations.
780178525Sjb	 */
781178525Sjb	do {
782178525Sjb
783178525Sjb		/*
784178525Sjb		 * Move up the tree and adjust the balance
785178525Sjb		 *
786178525Sjb		 * Capture the parent and which_child values for the next
787178525Sjb		 * iteration before any rotations occur.
788178525Sjb		 */
789178525Sjb		node = parent;
790178525Sjb		old_balance = AVL_XBALANCE(node);
791178525Sjb		new_balance = old_balance - avl_child2balance[which_child];
792178525Sjb		parent = AVL_XPARENT(node);
793178525Sjb		which_child = AVL_XCHILD(node);
794178525Sjb
795178525Sjb		/*
796178525Sjb		 * If a node was in perfect balance but isn't anymore then
797178525Sjb		 * we can stop, since the height didn't change above this point
798178525Sjb		 * due to a deletion.
799178525Sjb		 */
800178525Sjb		if (old_balance == 0) {
801178525Sjb			AVL_SETBALANCE(node, new_balance);
802178525Sjb			break;
803178525Sjb		}
804178525Sjb
805178525Sjb		/*
806178525Sjb		 * If the new balance is zero, we don't need to rotate
807178525Sjb		 * else
808178525Sjb		 * need a rotation to fix the balance.
809178525Sjb		 * If the rotation doesn't change the height
810178525Sjb		 * of the sub-tree we have finished adjusting.
811178525Sjb		 */
812178525Sjb		if (new_balance == 0)
813178525Sjb			AVL_SETBALANCE(node, new_balance);
814178525Sjb		else if (!avl_rotation(tree, node, new_balance))
815178525Sjb			break;
816178525Sjb	} while (parent != NULL);
817178525Sjb}
818178525Sjb
819210767Srpaulo#define	AVL_REINSERT(tree, obj)		\
820210767Srpaulo	avl_remove((tree), (obj));	\
821210767Srpaulo	avl_add((tree), (obj))
822210767Srpaulo
823210767Srpauloboolean_t
824210767Srpauloavl_update_lt(avl_tree_t *t, void *obj)
825210767Srpaulo{
826210767Srpaulo	void *neighbor;
827210767Srpaulo
828210767Srpaulo	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
829210767Srpaulo	    (t->avl_compar(obj, neighbor) <= 0));
830210767Srpaulo
831210767Srpaulo	neighbor = AVL_PREV(t, obj);
832210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
833210767Srpaulo		AVL_REINSERT(t, obj);
834210767Srpaulo		return (B_TRUE);
835210767Srpaulo	}
836210767Srpaulo
837210767Srpaulo	return (B_FALSE);
838210767Srpaulo}
839210767Srpaulo
840210767Srpauloboolean_t
841210767Srpauloavl_update_gt(avl_tree_t *t, void *obj)
842210767Srpaulo{
843210767Srpaulo	void *neighbor;
844210767Srpaulo
845210767Srpaulo	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
846210767Srpaulo	    (t->avl_compar(obj, neighbor) >= 0));
847210767Srpaulo
848210767Srpaulo	neighbor = AVL_NEXT(t, obj);
849210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
850210767Srpaulo		AVL_REINSERT(t, obj);
851210767Srpaulo		return (B_TRUE);
852210767Srpaulo	}
853210767Srpaulo
854210767Srpaulo	return (B_FALSE);
855210767Srpaulo}
856210767Srpaulo
857210767Srpauloboolean_t
858210767Srpauloavl_update(avl_tree_t *t, void *obj)
859210767Srpaulo{
860210767Srpaulo	void *neighbor;
861210767Srpaulo
862210767Srpaulo	neighbor = AVL_PREV(t, obj);
863210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
864210767Srpaulo		AVL_REINSERT(t, obj);
865210767Srpaulo		return (B_TRUE);
866210767Srpaulo	}
867210767Srpaulo
868210767Srpaulo	neighbor = AVL_NEXT(t, obj);
869210767Srpaulo	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
870210767Srpaulo		AVL_REINSERT(t, obj);
871210767Srpaulo		return (B_TRUE);
872210767Srpaulo	}
873210767Srpaulo
874210767Srpaulo	return (B_FALSE);
875210767Srpaulo}
876210767Srpaulo
877269845Sdelphijvoid
878269845Sdelphijavl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
879269845Sdelphij{
880269845Sdelphij	avl_node_t *temp_node;
881269845Sdelphij	ulong_t temp_numnodes;
882269845Sdelphij
883269845Sdelphij	ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
884269845Sdelphij	ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
885269845Sdelphij	ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
886269845Sdelphij
887269845Sdelphij	temp_node = tree1->avl_root;
888269845Sdelphij	temp_numnodes = tree1->avl_numnodes;
889269845Sdelphij	tree1->avl_root = tree2->avl_root;
890269845Sdelphij	tree1->avl_numnodes = tree2->avl_numnodes;
891269845Sdelphij	tree2->avl_root = temp_node;
892269845Sdelphij	tree2->avl_numnodes = temp_numnodes;
893269845Sdelphij}
894269845Sdelphij
895178525Sjb/*
896178525Sjb * initialize a new AVL tree
897178525Sjb */
898178525Sjbvoid
899178525Sjbavl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
900178525Sjb    size_t size, size_t offset)
901178525Sjb{
902178525Sjb	ASSERT(tree);
903178525Sjb	ASSERT(compar);
904178525Sjb	ASSERT(size > 0);
905178525Sjb	ASSERT(size >= offset + sizeof (avl_node_t));
906178525Sjb#ifdef _LP64
907178525Sjb	ASSERT((offset & 0x7) == 0);
908178525Sjb#endif
909178525Sjb
910178525Sjb	tree->avl_compar = compar;
911178525Sjb	tree->avl_root = NULL;
912178525Sjb	tree->avl_numnodes = 0;
913178525Sjb	tree->avl_size = size;
914178525Sjb	tree->avl_offset = offset;
915178525Sjb}
916178525Sjb
917178525Sjb/*
918178525Sjb * Delete a tree.
919178525Sjb */
920178525Sjb/* ARGSUSED */
921178525Sjbvoid
922178525Sjbavl_destroy(avl_tree_t *tree)
923178525Sjb{
924178525Sjb	ASSERT(tree);
925178525Sjb	ASSERT(tree->avl_numnodes == 0);
926178525Sjb	ASSERT(tree->avl_root == NULL);
927178525Sjb}
928178525Sjb
929178525Sjb
930178525Sjb/*
931178525Sjb * Return the number of nodes in an AVL tree.
932178525Sjb */
933178525Sjbulong_t
934178525Sjbavl_numnodes(avl_tree_t *tree)
935178525Sjb{
936178525Sjb	ASSERT(tree);
937178525Sjb	return (tree->avl_numnodes);
938178525Sjb}
939178525Sjb
940210767Srpauloboolean_t
941210767Srpauloavl_is_empty(avl_tree_t *tree)
942210767Srpaulo{
943210767Srpaulo	ASSERT(tree);
944210767Srpaulo	return (tree->avl_numnodes == 0);
945210767Srpaulo}
946178525Sjb
947178525Sjb#define	CHILDBIT	(1L)
948178525Sjb
949178525Sjb/*
950178525Sjb * Post-order tree walk used to visit all tree nodes and destroy the tree
951269845Sdelphij * in post order. This is used for destroying a tree without paying any cost
952178525Sjb * for rebalancing it.
953178525Sjb *
954178525Sjb * example:
955178525Sjb *
956178525Sjb *	void *cookie = NULL;
957178525Sjb *	my_data_t *node;
958178525Sjb *
959178525Sjb *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
960178525Sjb *		free(node);
961178525Sjb *	avl_destroy(tree);
962178525Sjb *
963178525Sjb * The cookie is really an avl_node_t to the current node's parent and
964178525Sjb * an indication of which child you looked at last.
965178525Sjb *
966178525Sjb * On input, a cookie value of CHILDBIT indicates the tree is done.
967178525Sjb */
968178525Sjbvoid *
969178525Sjbavl_destroy_nodes(avl_tree_t *tree, void **cookie)
970178525Sjb{
971178525Sjb	avl_node_t	*node;
972178525Sjb	avl_node_t	*parent;
973178525Sjb	int		child;
974178525Sjb	void		*first;
975178525Sjb	size_t		off = tree->avl_offset;
976178525Sjb
977178525Sjb	/*
978178525Sjb	 * Initial calls go to the first node or it's right descendant.
979178525Sjb	 */
980178525Sjb	if (*cookie == NULL) {
981178525Sjb		first = avl_first(tree);
982178525Sjb
983178525Sjb		/*
984178525Sjb		 * deal with an empty tree
985178525Sjb		 */
986178525Sjb		if (first == NULL) {
987178525Sjb			*cookie = (void *)CHILDBIT;
988178525Sjb			return (NULL);
989178525Sjb		}
990178525Sjb
991178525Sjb		node = AVL_DATA2NODE(first, off);
992178525Sjb		parent = AVL_XPARENT(node);
993178525Sjb		goto check_right_side;
994178525Sjb	}
995178525Sjb
996178525Sjb	/*
997178525Sjb	 * If there is no parent to return to we are done.
998178525Sjb	 */
999178525Sjb	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
1000178525Sjb	if (parent == NULL) {
1001178525Sjb		if (tree->avl_root != NULL) {
1002178525Sjb			ASSERT(tree->avl_numnodes == 1);
1003178525Sjb			tree->avl_root = NULL;
1004178525Sjb			tree->avl_numnodes = 0;
1005178525Sjb		}
1006178525Sjb		return (NULL);
1007178525Sjb	}
1008178525Sjb
1009178525Sjb	/*
1010178525Sjb	 * Remove the child pointer we just visited from the parent and tree.
1011178525Sjb	 */
1012178525Sjb	child = (uintptr_t)(*cookie) & CHILDBIT;
1013178525Sjb	parent->avl_child[child] = NULL;
1014178525Sjb	ASSERT(tree->avl_numnodes > 1);
1015178525Sjb	--tree->avl_numnodes;
1016178525Sjb
1017178525Sjb	/*
1018178525Sjb	 * If we just did a right child or there isn't one, go up to parent.
1019178525Sjb	 */
1020178525Sjb	if (child == 1 || parent->avl_child[1] == NULL) {
1021178525Sjb		node = parent;
1022178525Sjb		parent = AVL_XPARENT(parent);
1023178525Sjb		goto done;
1024178525Sjb	}
1025178525Sjb
1026178525Sjb	/*
1027178525Sjb	 * Do parent's right child, then leftmost descendent.
1028178525Sjb	 */
1029178525Sjb	node = parent->avl_child[1];
1030178525Sjb	while (node->avl_child[0] != NULL) {
1031178525Sjb		parent = node;
1032178525Sjb		node = node->avl_child[0];
1033178525Sjb	}
1034178525Sjb
1035178525Sjb	/*
1036178525Sjb	 * If here, we moved to a left child. It may have one
1037178525Sjb	 * child on the right (when balance == +1).
1038178525Sjb	 */
1039178525Sjbcheck_right_side:
1040178525Sjb	if (node->avl_child[1] != NULL) {
1041178525Sjb		ASSERT(AVL_XBALANCE(node) == 1);
1042178525Sjb		parent = node;
1043178525Sjb		node = node->avl_child[1];
1044178525Sjb		ASSERT(node->avl_child[0] == NULL &&
1045178525Sjb		    node->avl_child[1] == NULL);
1046178525Sjb	} else {
1047178525Sjb		ASSERT(AVL_XBALANCE(node) <= 0);
1048178525Sjb	}
1049178525Sjb
1050178525Sjbdone:
1051178525Sjb	if (parent == NULL) {
1052178525Sjb		*cookie = (void *)CHILDBIT;
1053178525Sjb		ASSERT(node == tree->avl_root);
1054178525Sjb	} else {
1055178525Sjb		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1056178525Sjb	}
1057178525Sjb
1058178525Sjb	return (AVL_NODE2DATA(node, off));
1059178525Sjb}
1060