1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 EMC Corp.
5 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32/*
33 * Path-compressed radix trie implementation.
34 *
35 * The implementation takes into account the following rationale:
36 * - Size of the nodes should be as small as possible but still big enough
37 *   to avoid a large maximum depth for the trie.  This is a balance
38 *   between the necessity to not wire too much physical memory for the nodes
39 *   and the necessity to avoid too much cache pollution during the trie
40 *   operations.
41 * - There is not a huge bias toward the number of lookup operations over
42 *   the number of insert and remove operations.  This basically implies
43 *   that optimizations supposedly helping one operation but hurting the
44 *   other might be carefully evaluated.
45 * - On average not many nodes are expected to be fully populated, hence
46 *   level compression may just complicate things.
47 */
48
49#include <sys/cdefs.h>
50__FBSDID("$FreeBSD$");
51
52#include "opt_ddb.h"
53
54#include <sys/param.h>
55#include <sys/systm.h>
56#include <sys/kernel.h>
57#include <sys/pctrie.h>
58#include <sys/proc.h>	/* smr.h depends on struct thread. */
59#include <sys/smr.h>
60#include <sys/smr_types.h>
61
62#ifdef DDB
63#include <ddb/ddb.h>
64#endif
65
66#define	PCTRIE_MASK	(PCTRIE_COUNT - 1)
67#define	PCTRIE_LIMIT	(howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
68
69/* Flag bits stored in node pointers. */
70#define	PCTRIE_ISLEAF	0x1
71#define	PCTRIE_FLAGS	0x1
72#define	PCTRIE_PAD	PCTRIE_FLAGS
73
74/* Returns one unit associated with specified level. */
75#define	PCTRIE_UNITLEVEL(lev)						\
76	((uint64_t)1 << ((lev) * PCTRIE_WIDTH))
77
78struct pctrie_node;
79typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
80
81struct pctrie_node {
82	uint64_t	pn_owner;			/* Owner of record. */
83	uint16_t	pn_count;			/* Valid children. */
84	uint8_t		pn_clev;			/* Current level. */
85	int8_t		pn_last;			/* Zero last ptr. */
86	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
87};
88
89enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
90
91static __inline void pctrie_node_store(smr_pctnode_t *p, void *val,
92    enum pctrie_access access);
93
94/*
95 * Allocate a node.  Pre-allocation should ensure that the request
96 * will always be satisfied.
97 */
98static struct pctrie_node *
99pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t owner,
100    uint16_t count, uint16_t clevel)
101{
102	struct pctrie_node *node;
103
104	node = allocfn(ptree);
105	if (node == NULL)
106		return (NULL);
107
108	/*
109	 * We want to clear the last child pointer after the final section
110	 * has exited so lookup can not return false negatives.  It is done
111	 * here because it will be cache-cold in the dtor callback.
112	 */
113	if (node->pn_last != 0) {
114		pctrie_node_store(&node->pn_child[node->pn_last - 1], NULL,
115		    PCTRIE_UNSERIALIZED);
116		node->pn_last = 0;
117	}
118	node->pn_owner = owner;
119	node->pn_count = count;
120	node->pn_clev = clevel;
121	return (node);
122}
123
124/*
125 * Free radix node.
126 */
127static __inline void
128pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
129    pctrie_free_t freefn, int8_t last)
130{
131#ifdef INVARIANTS
132	int slot;
133
134	KASSERT(node->pn_count == 0,
135	    ("pctrie_node_put: node %p has %d children", node,
136	    node->pn_count));
137	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
138		if (slot == last)
139			continue;
140		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
141		    NULL, ("pctrie_node_put: node %p has a child", node));
142	}
143#endif
144	node->pn_last = last + 1;
145	freefn(ptree, node);
146}
147
148/*
149 * Return the position in the array for a given level.
150 */
151static __inline int
152pctrie_slot(uint64_t index, uint16_t level)
153{
154
155	return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK);
156}
157
158/* Trims the key after the specified level. */
159static __inline uint64_t
160pctrie_trimkey(uint64_t index, uint16_t level)
161{
162	uint64_t ret;
163
164	ret = index;
165	if (level > 0) {
166		ret >>= level * PCTRIE_WIDTH;
167		ret <<= level * PCTRIE_WIDTH;
168	}
169	return (ret);
170}
171
172/*
173 * Fetch a node pointer from a slot.
174 */
175static __inline struct pctrie_node *
176pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
177{
178	switch (access) {
179	case PCTRIE_UNSERIALIZED:
180		return (smr_unserialized_load(p, true));
181	case PCTRIE_LOCKED:
182		return (smr_serialized_load(p, true));
183	case PCTRIE_SMR:
184		return (smr_entered_load(p, smr));
185	}
186	__assert_unreachable();
187}
188
189static __inline void
190pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
191{
192	switch (access) {
193	case PCTRIE_UNSERIALIZED:
194		smr_unserialized_store(p, v, true);
195		break;
196	case PCTRIE_LOCKED:
197		smr_serialized_store(p, v, true);
198		break;
199	case PCTRIE_SMR:
200		panic("%s: Not supported in SMR section.", __func__);
201		break;
202	default:
203		__assert_unreachable();
204		break;
205	}
206}
207
208/*
209 * Get the root node for a tree.
210 */
211static __inline struct pctrie_node *
212pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
213{
214	return (pctrie_node_load((smr_pctnode_t *)&ptree->pt_root, smr, access));
215}
216
217/*
218 * Set the root node for a tree.
219 */
220static __inline void
221pctrie_root_store(struct pctrie *ptree, struct pctrie_node *node,
222    enum pctrie_access access)
223{
224	pctrie_node_store((smr_pctnode_t *)&ptree->pt_root, node, access);
225}
226
227/*
228 * Returns TRUE if the specified node is a leaf and FALSE otherwise.
229 */
230static __inline bool
231pctrie_isleaf(struct pctrie_node *node)
232{
233
234	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
235}
236
237/*
238 * Returns the associated val extracted from node.
239 */
240static __inline uint64_t *
241pctrie_toval(struct pctrie_node *node)
242{
243
244	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
245}
246
247/*
248 * Adds the val as a child of the provided node.
249 */
250static __inline void
251pctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev,
252    uint64_t *val, enum pctrie_access access)
253{
254	int slot;
255
256	slot = pctrie_slot(index, clev);
257	pctrie_node_store(&node->pn_child[slot],
258	    (void *)((uintptr_t)val | PCTRIE_ISLEAF), access);
259}
260
261/*
262 * Returns the slot where two keys differ.
263 * It cannot accept 2 equal keys.
264 */
265static __inline uint16_t
266pctrie_keydiff(uint64_t index1, uint64_t index2)
267{
268	uint16_t clev;
269
270	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
271	    __func__, (uintmax_t)index1));
272
273	index1 ^= index2;
274	for (clev = PCTRIE_LIMIT;; clev--)
275		if (pctrie_slot(index1, clev) != 0)
276			return (clev);
277}
278
279/*
280 * Returns TRUE if it can be determined that key does not belong to the
281 * specified node.  Otherwise, returns FALSE.
282 */
283static __inline bool
284pctrie_keybarr(struct pctrie_node *node, uint64_t idx)
285{
286
287	if (node->pn_clev < PCTRIE_LIMIT) {
288		idx = pctrie_trimkey(idx, node->pn_clev + 1);
289		return (idx != node->pn_owner);
290	}
291	return (false);
292}
293
294/*
295 * Internal helper for pctrie_reclaim_allnodes().
296 * This function is recursive.
297 */
298static void
299pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node,
300    pctrie_free_t freefn)
301{
302	struct pctrie_node *child;
303	int slot;
304
305	KASSERT(node->pn_count <= PCTRIE_COUNT,
306	    ("pctrie_reclaim_allnodes_int: bad count in node %p", node));
307	for (slot = 0; node->pn_count != 0; slot++) {
308		child = pctrie_node_load(&node->pn_child[slot], NULL,
309		    PCTRIE_UNSERIALIZED);
310		if (child == NULL)
311			continue;
312		if (!pctrie_isleaf(child))
313			pctrie_reclaim_allnodes_int(ptree, child, freefn);
314		pctrie_node_store(&node->pn_child[slot], NULL,
315		    PCTRIE_UNSERIALIZED);
316		node->pn_count--;
317	}
318	pctrie_node_put(ptree, node, freefn, -1);
319}
320
321/*
322 * pctrie node zone initializer.
323 */
324int
325pctrie_zone_init(void *mem, int size __unused, int flags __unused)
326{
327	struct pctrie_node *node;
328
329	node = mem;
330	node->pn_last = 0;
331	memset(node->pn_child, 0, sizeof(node->pn_child));
332	return (0);
333}
334
335size_t
336pctrie_node_size(void)
337{
338
339	return (sizeof(struct pctrie_node));
340}
341
342/*
343 * Inserts the key-value pair into the trie.
344 * Panics if the key already exists.
345 */
346int
347pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
348{
349	uint64_t index, newind;
350	struct pctrie_node *node, *tmp;
351	smr_pctnode_t *parentp;
352	uint64_t *m;
353	int slot;
354	uint16_t clev;
355
356	index = *val;
357
358	/*
359	 * The owner of record for root is not really important because it
360	 * will never be used.
361	 */
362	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
363	if (node == NULL) {
364		ptree->pt_root = (uintptr_t)val | PCTRIE_ISLEAF;
365		return (0);
366	}
367	parentp = (smr_pctnode_t *)&ptree->pt_root;
368	for (;;) {
369		if (pctrie_isleaf(node)) {
370			m = pctrie_toval(node);
371			if (*m == index)
372				panic("%s: key %jx is already present",
373				    __func__, (uintmax_t)index);
374			clev = pctrie_keydiff(*m, index);
375			tmp = pctrie_node_get(ptree, allocfn,
376			    pctrie_trimkey(index, clev + 1), 2, clev);
377			if (tmp == NULL)
378				return (ENOMEM);
379			/* These writes are not yet visible due to ordering. */
380			pctrie_addval(tmp, index, clev, val,
381			    PCTRIE_UNSERIALIZED);
382			pctrie_addval(tmp, *m, clev, m, PCTRIE_UNSERIALIZED);
383			/* Synchronize to make leaf visible. */
384			pctrie_node_store(parentp, tmp, PCTRIE_LOCKED);
385			return (0);
386		} else if (pctrie_keybarr(node, index))
387			break;
388		slot = pctrie_slot(index, node->pn_clev);
389		parentp = &node->pn_child[slot];
390		tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED);
391		if (tmp == NULL) {
392			node->pn_count++;
393			pctrie_addval(node, index, node->pn_clev, val,
394			    PCTRIE_LOCKED);
395			return (0);
396		}
397		node = tmp;
398	}
399
400	/*
401	 * A new node is needed because the right insertion level is reached.
402	 * Setup the new intermediate node and add the 2 children: the
403	 * new object and the older edge.
404	 */
405	newind = node->pn_owner;
406	clev = pctrie_keydiff(newind, index);
407	tmp = pctrie_node_get(ptree, allocfn,
408	    pctrie_trimkey(index, clev + 1), 2, clev);
409	if (tmp == NULL)
410		return (ENOMEM);
411	slot = pctrie_slot(newind, clev);
412	/* These writes are not yet visible due to ordering. */
413	pctrie_addval(tmp, index, clev, val, PCTRIE_UNSERIALIZED);
414	pctrie_node_store(&tmp->pn_child[slot], node, PCTRIE_UNSERIALIZED);
415	/* Synchronize to make the above visible. */
416	pctrie_node_store(parentp, tmp, PCTRIE_LOCKED);
417
418	return (0);
419}
420
421/*
422 * Returns the value stored at the index.  If the index is not present,
423 * NULL is returned.
424 */
425static __always_inline uint64_t *
426_pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
427    enum pctrie_access access)
428{
429	struct pctrie_node *node;
430	uint64_t *m;
431	int slot;
432
433	node = pctrie_root_load(ptree, smr, access);
434	while (node != NULL) {
435		if (pctrie_isleaf(node)) {
436			m = pctrie_toval(node);
437			if (*m == index)
438				return (m);
439			break;
440		}
441		if (pctrie_keybarr(node, index))
442			break;
443		slot = pctrie_slot(index, node->pn_clev);
444		node = pctrie_node_load(&node->pn_child[slot], smr, access);
445	}
446	return (NULL);
447}
448
449/*
450 * Returns the value stored at the index, assuming access is externally
451 * synchronized by a lock.
452 *
453 * If the index is not present, NULL is returned.
454 */
455uint64_t *
456pctrie_lookup(struct pctrie *ptree, uint64_t index)
457{
458	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
459}
460
461/*
462 * Returns the value stored at the index without requiring an external lock.
463 *
464 * If the index is not present, NULL is returned.
465 */
466uint64_t *
467pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
468{
469	uint64_t *res;
470
471	smr_enter(smr);
472	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
473	smr_exit(smr);
474	return (res);
475}
476
477/*
478 * Look up the nearest entry at a position bigger than or equal to index,
479 * assuming access is externally synchronized by a lock.
480 */
481uint64_t *
482pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
483{
484	struct pctrie_node *stack[PCTRIE_LIMIT];
485	uint64_t inc;
486	uint64_t *m;
487	struct pctrie_node *child, *node;
488#ifdef INVARIANTS
489	int loops = 0;
490#endif
491	unsigned tos;
492	int slot;
493
494	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
495	if (node == NULL)
496		return (NULL);
497	else if (pctrie_isleaf(node)) {
498		m = pctrie_toval(node);
499		if (*m >= index)
500			return (m);
501		else
502			return (NULL);
503	}
504	tos = 0;
505	for (;;) {
506		/*
507		 * If the keys differ before the current bisection node,
508		 * then the search key might rollback to the earliest
509		 * available bisection node or to the smallest key
510		 * in the current node (if the owner is greater than the
511		 * search key).
512		 */
513		if (pctrie_keybarr(node, index)) {
514			if (index > node->pn_owner) {
515ascend:
516				KASSERT(++loops < 1000,
517				    ("pctrie_lookup_ge: too many loops"));
518
519				/*
520				 * Pop nodes from the stack until either the
521				 * stack is empty or a node that could have a
522				 * matching descendant is found.
523				 */
524				do {
525					if (tos == 0)
526						return (NULL);
527					node = stack[--tos];
528				} while (pctrie_slot(index,
529				    node->pn_clev) == (PCTRIE_COUNT - 1));
530
531				/*
532				 * The following computation cannot overflow
533				 * because index's slot at the current level
534				 * is less than PCTRIE_COUNT - 1.
535				 */
536				index = pctrie_trimkey(index,
537				    node->pn_clev);
538				index += PCTRIE_UNITLEVEL(node->pn_clev);
539			} else
540				index = node->pn_owner;
541			KASSERT(!pctrie_keybarr(node, index),
542			    ("pctrie_lookup_ge: keybarr failed"));
543		}
544		slot = pctrie_slot(index, node->pn_clev);
545		child = pctrie_node_load(&node->pn_child[slot], NULL,
546		    PCTRIE_LOCKED);
547		if (pctrie_isleaf(child)) {
548			m = pctrie_toval(child);
549			if (*m >= index)
550				return (m);
551		} else if (child != NULL)
552			goto descend;
553
554		/*
555		 * Look for an available edge or val within the current
556		 * bisection node.
557		 */
558                if (slot < (PCTRIE_COUNT - 1)) {
559			inc = PCTRIE_UNITLEVEL(node->pn_clev);
560			index = pctrie_trimkey(index, node->pn_clev);
561			do {
562				index += inc;
563				slot++;
564				child = pctrie_node_load(&node->pn_child[slot],
565				    NULL, PCTRIE_LOCKED);
566				if (pctrie_isleaf(child)) {
567					m = pctrie_toval(child);
568					if (*m >= index)
569						return (m);
570				} else if (child != NULL)
571					goto descend;
572			} while (slot < (PCTRIE_COUNT - 1));
573		}
574		KASSERT(child == NULL || pctrie_isleaf(child),
575		    ("pctrie_lookup_ge: child is radix node"));
576
577		/*
578		 * If a value or edge greater than the search slot is not found
579		 * in the current node, ascend to the next higher-level node.
580		 */
581		goto ascend;
582descend:
583		KASSERT(node->pn_clev > 0,
584		    ("pctrie_lookup_ge: pushing leaf's parent"));
585		KASSERT(tos < PCTRIE_LIMIT,
586		    ("pctrie_lookup_ge: stack overflow"));
587		stack[tos++] = node;
588		node = child;
589	}
590}
591
592/*
593 * Look up the nearest entry at a position less than or equal to index,
594 * assuming access is externally synchronized by a lock.
595 */
596uint64_t *
597pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
598{
599	struct pctrie_node *stack[PCTRIE_LIMIT];
600	uint64_t inc;
601	uint64_t *m;
602	struct pctrie_node *child, *node;
603#ifdef INVARIANTS
604	int loops = 0;
605#endif
606	unsigned tos;
607	int slot;
608
609	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
610	if (node == NULL)
611		return (NULL);
612	else if (pctrie_isleaf(node)) {
613		m = pctrie_toval(node);
614		if (*m <= index)
615			return (m);
616		else
617			return (NULL);
618	}
619	tos = 0;
620	for (;;) {
621		/*
622		 * If the keys differ before the current bisection node,
623		 * then the search key might rollback to the earliest
624		 * available bisection node or to the largest key
625		 * in the current node (if the owner is smaller than the
626		 * search key).
627		 */
628		if (pctrie_keybarr(node, index)) {
629			if (index > node->pn_owner) {
630				index = node->pn_owner + PCTRIE_COUNT *
631				    PCTRIE_UNITLEVEL(node->pn_clev);
632			} else {
633ascend:
634				KASSERT(++loops < 1000,
635				    ("pctrie_lookup_le: too many loops"));
636
637				/*
638				 * Pop nodes from the stack until either the
639				 * stack is empty or a node that could have a
640				 * matching descendant is found.
641				 */
642				do {
643					if (tos == 0)
644						return (NULL);
645					node = stack[--tos];
646				} while (pctrie_slot(index,
647				    node->pn_clev) == 0);
648
649				/*
650				 * The following computation cannot overflow
651				 * because index's slot at the current level
652				 * is greater than 0.
653				 */
654				index = pctrie_trimkey(index,
655				    node->pn_clev);
656			}
657			index--;
658			KASSERT(!pctrie_keybarr(node, index),
659			    ("pctrie_lookup_le: keybarr failed"));
660		}
661		slot = pctrie_slot(index, node->pn_clev);
662		child = pctrie_node_load(&node->pn_child[slot], NULL,
663		    PCTRIE_LOCKED);
664		if (pctrie_isleaf(child)) {
665			m = pctrie_toval(child);
666			if (*m <= index)
667				return (m);
668		} else if (child != NULL)
669			goto descend;
670
671		/*
672		 * Look for an available edge or value within the current
673		 * bisection node.
674		 */
675		if (slot > 0) {
676			inc = PCTRIE_UNITLEVEL(node->pn_clev);
677			index |= inc - 1;
678			do {
679				index -= inc;
680				slot--;
681				child = pctrie_node_load(&node->pn_child[slot],
682				    NULL, PCTRIE_LOCKED);
683				if (pctrie_isleaf(child)) {
684					m = pctrie_toval(child);
685					if (*m <= index)
686						return (m);
687				} else if (child != NULL)
688					goto descend;
689			} while (slot > 0);
690		}
691		KASSERT(child == NULL || pctrie_isleaf(child),
692		    ("pctrie_lookup_le: child is radix node"));
693
694		/*
695		 * If a value or edge smaller than the search slot is not found
696		 * in the current node, ascend to the next higher-level node.
697		 */
698		goto ascend;
699descend:
700		KASSERT(node->pn_clev > 0,
701		    ("pctrie_lookup_le: pushing leaf's parent"));
702		KASSERT(tos < PCTRIE_LIMIT,
703		    ("pctrie_lookup_le: stack overflow"));
704		stack[tos++] = node;
705		node = child;
706	}
707}
708
709/*
710 * Remove the specified index from the tree.
711 * Panics if the key is not present.
712 */
713void
714pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
715{
716	struct pctrie_node *node, *parent, *tmp;
717	uint64_t *m;
718	int i, slot;
719
720	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
721	if (pctrie_isleaf(node)) {
722		m = pctrie_toval(node);
723		if (*m != index)
724			panic("%s: invalid key found", __func__);
725		pctrie_root_store(ptree, NULL, PCTRIE_LOCKED);
726		return;
727	}
728	parent = NULL;
729	for (;;) {
730		if (node == NULL)
731			panic("pctrie_remove: impossible to locate the key");
732		slot = pctrie_slot(index, node->pn_clev);
733		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
734		    PCTRIE_LOCKED);
735		if (pctrie_isleaf(tmp)) {
736			m = pctrie_toval(tmp);
737			if (*m != index)
738				panic("%s: invalid key found", __func__);
739			pctrie_node_store(&node->pn_child[slot], NULL,
740			    PCTRIE_LOCKED);
741			node->pn_count--;
742			if (node->pn_count > 1)
743				break;
744			for (i = 0; i < PCTRIE_COUNT; i++) {
745				tmp = pctrie_node_load(&node->pn_child[i],
746				    NULL, PCTRIE_LOCKED);
747				if (tmp != NULL)
748					break;
749			}
750			KASSERT(i != PCTRIE_COUNT,
751			    ("%s: invalid node configuration", __func__));
752			if (parent == NULL)
753				pctrie_root_store(ptree, tmp, PCTRIE_LOCKED);
754			else {
755				slot = pctrie_slot(index, parent->pn_clev);
756				KASSERT(pctrie_node_load(
757					&parent->pn_child[slot], NULL,
758					PCTRIE_LOCKED) == node,
759				    ("%s: invalid child value", __func__));
760				pctrie_node_store(&parent->pn_child[slot], tmp,
761				    PCTRIE_LOCKED);
762			}
763			/*
764			 * The child is still valid and we can not zero the
765			 * pointer until all SMR references are gone.
766			 */
767			node->pn_count--;
768			pctrie_node_put(ptree, node, freefn, i);
769			break;
770		}
771		parent = node;
772		node = tmp;
773	}
774}
775
776/*
777 * Remove and free all the nodes from the tree.
778 * This function is recursive but there is a tight control on it as the
779 * maximum depth of the tree is fixed.
780 */
781void
782pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn)
783{
784	struct pctrie_node *root;
785
786	root = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
787	if (root == NULL)
788		return;
789	pctrie_root_store(ptree, NULL, PCTRIE_UNSERIALIZED);
790	if (!pctrie_isleaf(root))
791		pctrie_reclaim_allnodes_int(ptree, root, freefn);
792}
793
794#ifdef DDB
795/*
796 * Show details about the given node.
797 */
798DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
799{
800	struct pctrie_node *node, *tmp;
801	int i;
802
803        if (!have_addr)
804                return;
805	node = (struct pctrie_node *)addr;
806	db_printf("node %p, owner %jx, children count %u, level %u:\n",
807	    (void *)node, (uintmax_t)node->pn_owner, node->pn_count,
808	    node->pn_clev);
809	for (i = 0; i < PCTRIE_COUNT; i++) {
810		tmp = pctrie_node_load(&node->pn_child[i], NULL,
811		    PCTRIE_UNSERIALIZED);
812		if (tmp != NULL)
813			db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
814			    i, (void *)tmp,
815			    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
816			    node->pn_clev);
817	}
818}
819#endif /* DDB */
820