1219820Sjeff/*
2219820Sjeff * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3219820Sjeff * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
4219820Sjeff *
5219820Sjeff * This software is available to you under a choice of one of two
6219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
7219820Sjeff * General Public License (GPL) Version 2, available from the file
8219820Sjeff * COPYING in the main directory of this source tree, or the
9219820Sjeff * OpenIB.org BSD license below:
10219820Sjeff *
11219820Sjeff *     Redistribution and use in source and binary forms, with or
12219820Sjeff *     without modification, are permitted provided that the following
13219820Sjeff *     conditions are met:
14219820Sjeff *
15219820Sjeff *      - Redistributions of source code must retain the above
16219820Sjeff *        copyright notice, this list of conditions and the following
17219820Sjeff *        disclaimer.
18219820Sjeff *
19219820Sjeff *      - Redistributions in binary form must reproduce the above
20219820Sjeff *        copyright notice, this list of conditions and the following
21219820Sjeff *        disclaimer in the documentation and/or other materials
22219820Sjeff *        provided with the distribution.
23219820Sjeff *
24219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31219820Sjeff * SOFTWARE.
32219820Sjeff */
33219820Sjeff
34219820Sjeff#if HAVE_CONFIG_H
35219820Sjeff#  include <config.h>
36219820Sjeff#endif /* HAVE_CONFIG_H */
37219820Sjeff
38219820Sjeff#include <errno.h>
39219820Sjeff#include <sys/mman.h>
40219820Sjeff#include <unistd.h>
41219820Sjeff#include <stdlib.h>
42219820Sjeff#include <stdint.h>
43219820Sjeff
44219820Sjeff#include "ibverbs.h"
45219820Sjeff
46219820Sjeff/*
47219820Sjeff * Most distro's headers don't have these yet.
48219820Sjeff */
49219820Sjeff#ifdef	__linux__
50219820Sjeff#ifndef MADV_DONTFORK
51219820Sjeff#define MADV_DONTFORK	10
52219820Sjeff#endif
53219820Sjeff
54219820Sjeff#ifndef MADV_DOFORK
55219820Sjeff#define MADV_DOFORK	11
56219820Sjeff#endif
57219820Sjeff#else
58219820Sjeff#define	MADV_DONTFORK	INHERIT_NONE
59219820Sjeff#define	MADV_DOFORK	INHERIT_SHARE
60219820Sjeff#endif
61219820Sjeff
62219820Sjeffstruct ibv_mem_node {
63219820Sjeff	enum {
64219820Sjeff		IBV_RED,
65219820Sjeff		IBV_BLACK
66219820Sjeff	}			color;
67219820Sjeff	struct ibv_mem_node    *parent;
68219820Sjeff	struct ibv_mem_node    *left, *right;
69219820Sjeff	uintptr_t		start, end;
70219820Sjeff	int			refcnt;
71219820Sjeff};
72219820Sjeff
73219820Sjeffstatic struct ibv_mem_node *mm_root;
74219820Sjeffstatic pthread_mutex_t mm_mutex = PTHREAD_MUTEX_INITIALIZER;
75219820Sjeffstatic int page_size;
76219820Sjeffstatic int too_late;
77219820Sjeff
78219820Sjeffint ibv_fork_init(void)
79219820Sjeff{
80219820Sjeff#ifdef __linux__
81219820Sjeff	void *tmp;
82219820Sjeff	int ret;
83219820Sjeff#endif
84219820Sjeff
85219820Sjeff	if (mm_root)
86219820Sjeff		return 0;
87219820Sjeff
88219820Sjeff	if (too_late)
89219820Sjeff		return EINVAL;
90219820Sjeff
91219820Sjeff	page_size = sysconf(_SC_PAGESIZE);
92219820Sjeff	if (page_size < 0)
93219820Sjeff		return errno;
94219820Sjeff
95219820Sjeff#ifdef __linux__
96219820Sjeff	if (posix_memalign(&tmp, page_size, page_size))
97219820Sjeff		return ENOMEM;
98219820Sjeff
99219820Sjeff	ret = madvise(tmp, page_size, MADV_DONTFORK) ||
100219820Sjeff	      madvise(tmp, page_size, MADV_DOFORK);
101219820Sjeff
102219820Sjeff	free(tmp);
103219820Sjeff
104219820Sjeff	if (ret)
105219820Sjeff		return ENOSYS;
106219820Sjeff#endif
107219820Sjeff
108219820Sjeff	mm_root = malloc(sizeof *mm_root);
109219820Sjeff	if (!mm_root)
110219820Sjeff		return ENOMEM;
111219820Sjeff
112219820Sjeff	mm_root->parent = NULL;
113219820Sjeff	mm_root->left   = NULL;
114219820Sjeff	mm_root->right  = NULL;
115219820Sjeff	mm_root->color  = IBV_BLACK;
116219820Sjeff	mm_root->start  = 0;
117219820Sjeff	mm_root->end    = UINTPTR_MAX;
118219820Sjeff	mm_root->refcnt = 0;
119219820Sjeff
120219820Sjeff	return 0;
121219820Sjeff}
122219820Sjeff
123219820Sjeffstatic struct ibv_mem_node *__mm_prev(struct ibv_mem_node *node)
124219820Sjeff{
125219820Sjeff	if (node->left) {
126219820Sjeff		node = node->left;
127219820Sjeff		while (node->right)
128219820Sjeff			node = node->right;
129219820Sjeff	} else {
130219820Sjeff		while (node->parent && node == node->parent->left)
131219820Sjeff			node = node->parent;
132219820Sjeff
133219820Sjeff		node = node->parent;
134219820Sjeff	}
135219820Sjeff
136219820Sjeff	return node;
137219820Sjeff}
138219820Sjeff
139219820Sjeffstatic struct ibv_mem_node *__mm_next(struct ibv_mem_node *node)
140219820Sjeff{
141219820Sjeff	if (node->right) {
142219820Sjeff		node = node->right;
143219820Sjeff		while (node->left)
144219820Sjeff			node = node->left;
145219820Sjeff	} else {
146219820Sjeff		while (node->parent && node == node->parent->right)
147219820Sjeff			node = node->parent;
148219820Sjeff
149219820Sjeff		node = node->parent;
150219820Sjeff	}
151219820Sjeff
152219820Sjeff	return node;
153219820Sjeff}
154219820Sjeff
155219820Sjeffstatic void __mm_rotate_right(struct ibv_mem_node *node)
156219820Sjeff{
157219820Sjeff	struct ibv_mem_node *tmp;
158219820Sjeff
159219820Sjeff	tmp = node->left;
160219820Sjeff
161219820Sjeff	node->left = tmp->right;
162219820Sjeff	if (node->left)
163219820Sjeff		node->left->parent = node;
164219820Sjeff
165219820Sjeff	if (node->parent) {
166219820Sjeff		if (node->parent->right == node)
167219820Sjeff			node->parent->right = tmp;
168219820Sjeff		else
169219820Sjeff			node->parent->left = tmp;
170219820Sjeff	} else
171219820Sjeff		mm_root = tmp;
172219820Sjeff
173219820Sjeff	tmp->parent = node->parent;
174219820Sjeff
175219820Sjeff	tmp->right = node;
176219820Sjeff	node->parent = tmp;
177219820Sjeff}
178219820Sjeff
179219820Sjeffstatic void __mm_rotate_left(struct ibv_mem_node *node)
180219820Sjeff{
181219820Sjeff	struct ibv_mem_node *tmp;
182219820Sjeff
183219820Sjeff	tmp = node->right;
184219820Sjeff
185219820Sjeff	node->right = tmp->left;
186219820Sjeff	if (node->right)
187219820Sjeff		node->right->parent = node;
188219820Sjeff
189219820Sjeff	if (node->parent) {
190219820Sjeff		if (node->parent->right == node)
191219820Sjeff			node->parent->right = tmp;
192219820Sjeff		else
193219820Sjeff			node->parent->left = tmp;
194219820Sjeff	} else
195219820Sjeff		mm_root = tmp;
196219820Sjeff
197219820Sjeff	tmp->parent = node->parent;
198219820Sjeff
199219820Sjeff	tmp->left = node;
200219820Sjeff	node->parent = tmp;
201219820Sjeff}
202219820Sjeff
203219820Sjeffstatic int verify(struct ibv_mem_node *node)
204219820Sjeff{
205219820Sjeff	int hl, hr;
206219820Sjeff
207219820Sjeff	if (!node)
208219820Sjeff		return 1;
209219820Sjeff
210219820Sjeff	hl = verify(node->left);
211219820Sjeff	hr = verify(node->left);
212219820Sjeff
213219820Sjeff	if (!hl || !hr)
214219820Sjeff		return 0;
215219820Sjeff	if (hl != hr)
216219820Sjeff		return 0;
217219820Sjeff
218219820Sjeff	if (node->color == IBV_RED) {
219219820Sjeff		if (node->left && node->left->color != IBV_BLACK)
220219820Sjeff			return 0;
221219820Sjeff		if (node->right && node->right->color != IBV_BLACK)
222219820Sjeff			return 0;
223219820Sjeff		return hl;
224219820Sjeff	}
225219820Sjeff
226219820Sjeff	return hl + 1;
227219820Sjeff}
228219820Sjeff
229219820Sjeffstatic void __mm_add_rebalance(struct ibv_mem_node *node)
230219820Sjeff{
231219820Sjeff	struct ibv_mem_node *parent, *gp, *uncle;
232219820Sjeff
233219820Sjeff	while (node->parent && node->parent->color == IBV_RED) {
234219820Sjeff		parent = node->parent;
235219820Sjeff		gp     = node->parent->parent;
236219820Sjeff
237219820Sjeff		if (parent == gp->left) {
238219820Sjeff			uncle = gp->right;
239219820Sjeff
240219820Sjeff			if (uncle && uncle->color == IBV_RED) {
241219820Sjeff				parent->color = IBV_BLACK;
242219820Sjeff				uncle->color  = IBV_BLACK;
243219820Sjeff				gp->color     = IBV_RED;
244219820Sjeff
245219820Sjeff				node = gp;
246219820Sjeff			} else {
247219820Sjeff				if (node == parent->right) {
248219820Sjeff					__mm_rotate_left(parent);
249219820Sjeff					node   = parent;
250219820Sjeff					parent = node->parent;
251219820Sjeff				}
252219820Sjeff
253219820Sjeff				parent->color = IBV_BLACK;
254219820Sjeff				gp->color     = IBV_RED;
255219820Sjeff
256219820Sjeff				__mm_rotate_right(gp);
257219820Sjeff			}
258219820Sjeff		} else {
259219820Sjeff			uncle = gp->left;
260219820Sjeff
261219820Sjeff			if (uncle && uncle->color == IBV_RED) {
262219820Sjeff				parent->color = IBV_BLACK;
263219820Sjeff				uncle->color  = IBV_BLACK;
264219820Sjeff				gp->color     = IBV_RED;
265219820Sjeff
266219820Sjeff				node = gp;
267219820Sjeff			} else {
268219820Sjeff				if (node == parent->left) {
269219820Sjeff					__mm_rotate_right(parent);
270219820Sjeff					node   = parent;
271219820Sjeff					parent = node->parent;
272219820Sjeff				}
273219820Sjeff
274219820Sjeff				parent->color = IBV_BLACK;
275219820Sjeff				gp->color     = IBV_RED;
276219820Sjeff
277219820Sjeff				__mm_rotate_left(gp);
278219820Sjeff			}
279219820Sjeff		}
280219820Sjeff	}
281219820Sjeff
282219820Sjeff	mm_root->color = IBV_BLACK;
283219820Sjeff}
284219820Sjeff
285219820Sjeffstatic void __mm_add(struct ibv_mem_node *new)
286219820Sjeff{
287219820Sjeff	struct ibv_mem_node *node, *parent = NULL;
288219820Sjeff
289219820Sjeff	node = mm_root;
290219820Sjeff	while (node) {
291219820Sjeff		parent = node;
292219820Sjeff		if (node->start < new->start)
293219820Sjeff			node = node->right;
294219820Sjeff		else
295219820Sjeff			node = node->left;
296219820Sjeff	}
297219820Sjeff
298219820Sjeff	if (parent->start < new->start)
299219820Sjeff		parent->right = new;
300219820Sjeff	else
301219820Sjeff		parent->left = new;
302219820Sjeff
303219820Sjeff	new->parent = parent;
304219820Sjeff	new->left   = NULL;
305219820Sjeff	new->right  = NULL;
306219820Sjeff
307219820Sjeff	new->color = IBV_RED;
308219820Sjeff	__mm_add_rebalance(new);
309219820Sjeff}
310219820Sjeff
311219820Sjeffstatic void __mm_remove(struct ibv_mem_node *node)
312219820Sjeff{
313219820Sjeff	struct ibv_mem_node *child, *parent, *sib, *tmp;
314219820Sjeff	int nodecol;
315219820Sjeff
316219820Sjeff	if (node->left && node->right) {
317219820Sjeff		tmp = node->left;
318219820Sjeff		while (tmp->right)
319219820Sjeff			tmp = tmp->right;
320219820Sjeff
321219820Sjeff		nodecol    = tmp->color;
322219820Sjeff		child      = tmp->left;
323219820Sjeff		tmp->color = node->color;
324219820Sjeff
325219820Sjeff		if (tmp->parent != node) {
326219820Sjeff			parent        = tmp->parent;
327219820Sjeff			parent->right = tmp->left;
328219820Sjeff			if (tmp->left)
329219820Sjeff				tmp->left->parent = parent;
330219820Sjeff
331219820Sjeff			tmp->left   	   = node->left;
332219820Sjeff			node->left->parent = tmp;
333219820Sjeff		} else
334219820Sjeff			parent = tmp;
335219820Sjeff
336219820Sjeff		tmp->right          = node->right;
337219820Sjeff		node->right->parent = tmp;
338219820Sjeff
339219820Sjeff		tmp->parent = node->parent;
340219820Sjeff		if (node->parent) {
341219820Sjeff			if (node->parent->left == node)
342219820Sjeff				node->parent->left = tmp;
343219820Sjeff			else
344219820Sjeff				node->parent->right = tmp;
345219820Sjeff		} else
346219820Sjeff			mm_root = tmp;
347219820Sjeff	} else {
348219820Sjeff		nodecol = node->color;
349219820Sjeff
350219820Sjeff		child  = node->left ? node->left : node->right;
351219820Sjeff		parent = node->parent;
352219820Sjeff
353219820Sjeff		if (child)
354219820Sjeff			child->parent = parent;
355219820Sjeff		if (parent) {
356219820Sjeff			if (parent->left == node)
357219820Sjeff				parent->left = child;
358219820Sjeff			else
359219820Sjeff				parent->right = child;
360219820Sjeff		} else
361219820Sjeff			mm_root = child;
362219820Sjeff	}
363219820Sjeff
364219820Sjeff	free(node);
365219820Sjeff
366219820Sjeff	if (nodecol == IBV_RED)
367219820Sjeff		return;
368219820Sjeff
369219820Sjeff	while ((!child || child->color == IBV_BLACK) && child != mm_root) {
370219820Sjeff		if (parent->left == child) {
371219820Sjeff			sib = parent->right;
372219820Sjeff
373219820Sjeff			if (sib->color == IBV_RED) {
374219820Sjeff				parent->color = IBV_RED;
375219820Sjeff				sib->color    = IBV_BLACK;
376219820Sjeff				__mm_rotate_left(parent);
377219820Sjeff				sib = parent->right;
378219820Sjeff			}
379219820Sjeff
380219820Sjeff			if ((!sib->left  || sib->left->color  == IBV_BLACK) &&
381219820Sjeff			    (!sib->right || sib->right->color == IBV_BLACK)) {
382219820Sjeff				sib->color = IBV_RED;
383219820Sjeff				child  = parent;
384219820Sjeff				parent = child->parent;
385219820Sjeff			} else {
386219820Sjeff				if (!sib->right || sib->right->color == IBV_BLACK) {
387219820Sjeff					if (sib->left)
388219820Sjeff						sib->left->color = IBV_BLACK;
389219820Sjeff					sib->color = IBV_RED;
390219820Sjeff					__mm_rotate_right(sib);
391219820Sjeff					sib = parent->right;
392219820Sjeff				}
393219820Sjeff
394219820Sjeff				sib->color    = parent->color;
395219820Sjeff				parent->color = IBV_BLACK;
396219820Sjeff				if (sib->right)
397219820Sjeff					sib->right->color = IBV_BLACK;
398219820Sjeff				__mm_rotate_left(parent);
399219820Sjeff				child = mm_root;
400219820Sjeff				break;
401219820Sjeff			}
402219820Sjeff		} else {
403219820Sjeff			sib = parent->left;
404219820Sjeff
405219820Sjeff			if (sib->color == IBV_RED) {
406219820Sjeff				parent->color = IBV_RED;
407219820Sjeff				sib->color    = IBV_BLACK;
408219820Sjeff				__mm_rotate_right(parent);
409219820Sjeff				sib = parent->left;
410219820Sjeff			}
411219820Sjeff
412219820Sjeff			if ((!sib->left  || sib->left->color  == IBV_BLACK) &&
413219820Sjeff			    (!sib->right || sib->right->color == IBV_BLACK)) {
414219820Sjeff				sib->color = IBV_RED;
415219820Sjeff				child  = parent;
416219820Sjeff				parent = child->parent;
417219820Sjeff			} else {
418219820Sjeff				if (!sib->left || sib->left->color == IBV_BLACK) {
419219820Sjeff					if (sib->right)
420219820Sjeff						sib->right->color = IBV_BLACK;
421219820Sjeff					sib->color = IBV_RED;
422219820Sjeff					__mm_rotate_left(sib);
423219820Sjeff					sib = parent->left;
424219820Sjeff				}
425219820Sjeff
426219820Sjeff				sib->color    = parent->color;
427219820Sjeff				parent->color = IBV_BLACK;
428219820Sjeff				if (sib->left)
429219820Sjeff					sib->left->color = IBV_BLACK;
430219820Sjeff				__mm_rotate_right(parent);
431219820Sjeff				child = mm_root;
432219820Sjeff				break;
433219820Sjeff			}
434219820Sjeff		}
435219820Sjeff	}
436219820Sjeff
437219820Sjeff	if (child)
438219820Sjeff		child->color = IBV_BLACK;
439219820Sjeff}
440219820Sjeff
441219820Sjeffstatic struct ibv_mem_node *__mm_find_start(uintptr_t start, uintptr_t end)
442219820Sjeff{
443219820Sjeff	struct ibv_mem_node *node = mm_root;
444219820Sjeff
445219820Sjeff	while (node) {
446219820Sjeff		if (node->start <= start && node->end >= start)
447219820Sjeff			break;
448219820Sjeff
449219820Sjeff		if (node->start < start)
450219820Sjeff			node = node->right;
451219820Sjeff		else
452219820Sjeff			node = node->left;
453219820Sjeff	}
454219820Sjeff
455219820Sjeff	return node;
456219820Sjeff}
457219820Sjeff
458219820Sjeffstatic struct ibv_mem_node *merge_ranges(struct ibv_mem_node *node,
459219820Sjeff					 struct ibv_mem_node *prev)
460219820Sjeff{
461219820Sjeff	prev->end = node->end;
462219820Sjeff	prev->refcnt = node->refcnt;
463219820Sjeff	__mm_remove(node);
464219820Sjeff
465219820Sjeff	return prev;
466219820Sjeff}
467219820Sjeff
468219820Sjeffstatic struct ibv_mem_node *split_range(struct ibv_mem_node *node,
469219820Sjeff					uintptr_t cut_line)
470219820Sjeff{
471219820Sjeff	struct ibv_mem_node *new_node = NULL;
472219820Sjeff
473219820Sjeff	new_node = malloc(sizeof *new_node);
474219820Sjeff	if (!new_node)
475219820Sjeff		return NULL;
476219820Sjeff	new_node->start  = cut_line;
477219820Sjeff	new_node->end    = node->end;
478219820Sjeff	new_node->refcnt = node->refcnt;
479219820Sjeff	node->end  = cut_line - 1;
480219820Sjeff	__mm_add(new_node);
481219820Sjeff
482219820Sjeff	return new_node;
483219820Sjeff}
484219820Sjeff
485219820Sjeffstatic struct ibv_mem_node *get_start_node(uintptr_t start, uintptr_t end,
486219820Sjeff					   int inc)
487219820Sjeff{
488219820Sjeff	struct ibv_mem_node *node, *tmp = NULL;
489219820Sjeff
490219820Sjeff	node = __mm_find_start(start, end);
491219820Sjeff	if (node->start < start)
492219820Sjeff		node = split_range(node, start);
493219820Sjeff	else {
494219820Sjeff		tmp = __mm_prev(node);
495219820Sjeff		if (tmp && tmp->refcnt == node->refcnt + inc)
496219820Sjeff			node = merge_ranges(node, tmp);
497219820Sjeff	}
498219820Sjeff	return node;
499219820Sjeff}
500219820Sjeff
501219820Sjeff/*
502219820Sjeff * This function is called if madvise() fails to undo merging/splitting
503219820Sjeff * operations performed on the node.
504219820Sjeff */
505219820Sjeffstatic struct ibv_mem_node *undo_node(struct ibv_mem_node *node,
506219820Sjeff				      uintptr_t start, int inc)
507219820Sjeff{
508219820Sjeff	struct ibv_mem_node *tmp = NULL;
509219820Sjeff
510219820Sjeff	/*
511219820Sjeff	 * This condition can be true only if we merged this
512219820Sjeff	 * node with the previous one, so we need to split them.
513219820Sjeff	*/
514219820Sjeff	if (start > node->start) {
515219820Sjeff		tmp = split_range(node, start);
516219820Sjeff		if (tmp) {
517219820Sjeff			node->refcnt += inc;
518219820Sjeff			node = tmp;
519219820Sjeff		} else
520219820Sjeff			return NULL;
521219820Sjeff	}
522219820Sjeff
523219820Sjeff	tmp  =  __mm_prev(node);
524219820Sjeff	if (tmp && tmp->refcnt == node->refcnt)
525219820Sjeff		node = merge_ranges(node, tmp);
526219820Sjeff
527219820Sjeff	tmp  =  __mm_next(node);
528219820Sjeff	if (tmp && tmp->refcnt == node->refcnt)
529219820Sjeff		node = merge_ranges(tmp, node);
530219820Sjeff
531219820Sjeff	return node;
532219820Sjeff}
533219820Sjeff
534219820Sjeffstatic int ibv_madvise_range(void *base, size_t size, int advice)
535219820Sjeff{
536219820Sjeff	uintptr_t start, end;
537219820Sjeff	struct ibv_mem_node *node, *tmp;
538219820Sjeff	int inc;
539219820Sjeff	int rolling_back = 0;
540219820Sjeff	int ret = 0;
541219820Sjeff
542219820Sjeff	if (!size)
543219820Sjeff		return 0;
544219820Sjeff
545219820Sjeff	start = (uintptr_t) base & ~(page_size - 1);
546219820Sjeff	end   = ((uintptr_t) (base + size + page_size - 1) &
547219820Sjeff		 ~(page_size - 1)) - 1;
548219820Sjeff
549219820Sjeff	pthread_mutex_lock(&mm_mutex);
550219820Sjeffagain:
551219820Sjeff	inc = advice == MADV_DONTFORK ? 1 : -1;
552219820Sjeff
553219820Sjeff	node = get_start_node(start, end, inc);
554219820Sjeff	if (!node) {
555219820Sjeff		ret = -1;
556219820Sjeff		goto out;
557219820Sjeff	}
558219820Sjeff
559219820Sjeff	while (node && node->start <= end) {
560219820Sjeff		if (node->end > end) {
561219820Sjeff			if (!split_range(node, end + 1)) {
562219820Sjeff				ret = -1;
563219820Sjeff				goto out;
564219820Sjeff			}
565219820Sjeff		}
566219820Sjeff
567219820Sjeff		if ((inc == -1 && node->refcnt == 1) ||
568219820Sjeff		    (inc ==  1 && node->refcnt == 0)) {
569219820Sjeff			/*
570219820Sjeff			 * If this is the first time through the loop,
571219820Sjeff			 * and we merged this node with the previous
572219820Sjeff			 * one, then we only want to do the madvise()
573219820Sjeff			 * on start ... node->end (rather than
574219820Sjeff			 * starting at node->start).
575219820Sjeff			 *
576219820Sjeff			 * Otherwise we end up doing madvise() on
577219820Sjeff			 * bigger region than we're being asked to,
578219820Sjeff			 * and that may lead to a spurious failure.
579219820Sjeff			 */
580219820Sjeff			if (start > node->start)
581219820Sjeff				ret = minherit((void *) start, node->end - start + 1,
582219820Sjeff					      advice);
583219820Sjeff			else
584219820Sjeff				ret = minherit((void *) node->start,
585219820Sjeff					      node->end - node->start + 1,
586219820Sjeff					      advice);
587219820Sjeff			if (ret) {
588219820Sjeff				node = undo_node(node, start, inc);
589219820Sjeff
590219820Sjeff				if (rolling_back || !node)
591219820Sjeff					goto out;
592219820Sjeff
593219820Sjeff				/* madvise failed, roll back previous changes */
594219820Sjeff				rolling_back = 1;
595219820Sjeff				advice = advice == MADV_DONTFORK ?
596219820Sjeff					MADV_DOFORK : MADV_DONTFORK;
597219820Sjeff				tmp = __mm_prev(node);
598219820Sjeff				if (!tmp || start > tmp->end)
599219820Sjeff					goto out;
600219820Sjeff				end = tmp->end;
601219820Sjeff				goto again;
602219820Sjeff			}
603219820Sjeff		}
604219820Sjeff
605219820Sjeff		node->refcnt += inc;
606219820Sjeff		node = __mm_next(node);
607219820Sjeff	}
608219820Sjeff
609219820Sjeff	if (node) {
610219820Sjeff		tmp = __mm_prev(node);
611219820Sjeff		if (tmp && node->refcnt == tmp->refcnt)
612219820Sjeff			node = merge_ranges(node, tmp);
613219820Sjeff	}
614219820Sjeff
615219820Sjeffout:
616219820Sjeff	if (rolling_back)
617219820Sjeff		ret = -1;
618219820Sjeff
619219820Sjeff	pthread_mutex_unlock(&mm_mutex);
620219820Sjeff
621219820Sjeff	return ret;
622219820Sjeff}
623219820Sjeff
624219820Sjeffint ibv_dontfork_range(void *base, size_t size)
625219820Sjeff{
626219820Sjeff	if (mm_root)
627219820Sjeff		return ibv_madvise_range(base, size, MADV_DONTFORK);
628219820Sjeff	else {
629219820Sjeff		too_late = 1;
630219820Sjeff		return 0;
631219820Sjeff	}
632219820Sjeff}
633219820Sjeff
634219820Sjeffint ibv_dofork_range(void *base, size_t size)
635219820Sjeff{
636219820Sjeff	if (mm_root)
637219820Sjeff		return ibv_madvise_range(base, size, MADV_DOFORK);
638219820Sjeff	else {
639219820Sjeff		too_late = 1;
640219820Sjeff		return 0;
641219820Sjeff	}
642219820Sjeff}
643