1234370Sjasone#define	JEMALLOC_CHUNK_C_
2234370Sjasone#include "jemalloc/internal/jemalloc_internal.h"
3234370Sjasone
4234370Sjasone/******************************************************************************/
5234370Sjasone/* Data. */
6234370Sjasone
7242844Sjasoneconst char	*opt_dss = DSS_DEFAULT;
8242844Sjasonesize_t		opt_lg_chunk = LG_CHUNK_DEFAULT;
9234370Sjasone
10234370Sjasonemalloc_mutex_t	chunks_mtx;
11234370Sjasonechunk_stats_t	stats_chunks;
12234370Sjasone
13234370Sjasone/*
14234370Sjasone * Trees of chunks that were previously allocated (trees differ only in node
15234370Sjasone * ordering).  These are used when allocating chunks, in an attempt to re-use
16234370Sjasone * address space.  Depending on function, different tree orderings are needed,
17234370Sjasone * which is why there are two trees with the same contents.
18234370Sjasone */
19242844Sjasonestatic extent_tree_t	chunks_szad_mmap;
20242844Sjasonestatic extent_tree_t	chunks_ad_mmap;
21242844Sjasonestatic extent_tree_t	chunks_szad_dss;
22242844Sjasonestatic extent_tree_t	chunks_ad_dss;
23234370Sjasone
24234370Sjasonertree_t		*chunks_rtree;
25234370Sjasone
26234370Sjasone/* Various chunk-related settings. */
27234370Sjasonesize_t		chunksize;
28234370Sjasonesize_t		chunksize_mask; /* (chunksize - 1). */
29234370Sjasonesize_t		chunk_npages;
30234370Sjasonesize_t		map_bias;
31234370Sjasonesize_t		arena_maxclass; /* Max size class for arenas. */
32234370Sjasone
33234370Sjasone/******************************************************************************/
34234370Sjasone/* Function prototypes for non-inline static functions. */
35234370Sjasone
36242844Sjasonestatic void	*chunk_recycle(extent_tree_t *chunks_szad,
37242844Sjasone    extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base,
38235238Sjasone    bool *zero);
39242844Sjasonestatic void	chunk_record(extent_tree_t *chunks_szad,
40242844Sjasone    extent_tree_t *chunks_ad, void *chunk, size_t size);
41234370Sjasone
42234370Sjasone/******************************************************************************/
43234370Sjasone
44234370Sjasonestatic void *
45242844Sjasonechunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
46242844Sjasone    size_t alignment, bool base, bool *zero)
47234370Sjasone{
48234370Sjasone	void *ret;
49234370Sjasone	extent_node_t *node;
50234370Sjasone	extent_node_t key;
51234370Sjasone	size_t alloc_size, leadsize, trailsize;
52242844Sjasone	bool zeroed;
53234370Sjasone
54235238Sjasone	if (base) {
55235238Sjasone		/*
56235238Sjasone		 * This function may need to call base_node_{,de}alloc(), but
57235238Sjasone		 * the current chunk allocation request is on behalf of the
58235238Sjasone		 * base allocator.  Avoid deadlock (and if that weren't an
59235238Sjasone		 * issue, potential for infinite recursion) by returning NULL.
60235238Sjasone		 */
61235238Sjasone		return (NULL);
62235238Sjasone	}
63235238Sjasone
64234370Sjasone	alloc_size = size + alignment - chunksize;
65234370Sjasone	/* Beware size_t wrap-around. */
66234370Sjasone	if (alloc_size < size)
67234370Sjasone		return (NULL);
68234370Sjasone	key.addr = NULL;
69234370Sjasone	key.size = alloc_size;
70234370Sjasone	malloc_mutex_lock(&chunks_mtx);
71242844Sjasone	node = extent_tree_szad_nsearch(chunks_szad, &key);
72234370Sjasone	if (node == NULL) {
73234370Sjasone		malloc_mutex_unlock(&chunks_mtx);
74234370Sjasone		return (NULL);
75234370Sjasone	}
76234370Sjasone	leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
77234370Sjasone	    (uintptr_t)node->addr;
78235238Sjasone	assert(node->size >= leadsize + size);
79235238Sjasone	trailsize = node->size - leadsize - size;
80234370Sjasone	ret = (void *)((uintptr_t)node->addr + leadsize);
81245868Sjasone	zeroed = node->zeroed;
82245868Sjasone	if (zeroed)
83245868Sjasone	    *zero = true;
84234370Sjasone	/* Remove node from the tree. */
85242844Sjasone	extent_tree_szad_remove(chunks_szad, node);
86242844Sjasone	extent_tree_ad_remove(chunks_ad, node);
87234370Sjasone	if (leadsize != 0) {
88234370Sjasone		/* Insert the leading space as a smaller chunk. */
89234370Sjasone		node->size = leadsize;
90242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
91242844Sjasone		extent_tree_ad_insert(chunks_ad, node);
92234370Sjasone		node = NULL;
93234370Sjasone	}
94234370Sjasone	if (trailsize != 0) {
95234370Sjasone		/* Insert the trailing space as a smaller chunk. */
96234370Sjasone		if (node == NULL) {
97234370Sjasone			/*
98234370Sjasone			 * An additional node is required, but
99234370Sjasone			 * base_node_alloc() can cause a new base chunk to be
100234370Sjasone			 * allocated.  Drop chunks_mtx in order to avoid
101234370Sjasone			 * deadlock, and if node allocation fails, deallocate
102234370Sjasone			 * the result before returning an error.
103234370Sjasone			 */
104234370Sjasone			malloc_mutex_unlock(&chunks_mtx);
105234370Sjasone			node = base_node_alloc();
106234370Sjasone			if (node == NULL) {
107234370Sjasone				chunk_dealloc(ret, size, true);
108234370Sjasone				return (NULL);
109234370Sjasone			}
110234370Sjasone			malloc_mutex_lock(&chunks_mtx);
111234370Sjasone		}
112234370Sjasone		node->addr = (void *)((uintptr_t)(ret) + size);
113234370Sjasone		node->size = trailsize;
114251300Sjasone		node->zeroed = zeroed;
115242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
116242844Sjasone		extent_tree_ad_insert(chunks_ad, node);
117234370Sjasone		node = NULL;
118234370Sjasone	}
119234370Sjasone	malloc_mutex_unlock(&chunks_mtx);
120234370Sjasone
121245868Sjasone	if (node != NULL)
122245868Sjasone		base_node_dealloc(node);
123245868Sjasone	if (*zero) {
124245868Sjasone		if (zeroed == false)
125245868Sjasone			memset(ret, 0, size);
126245868Sjasone		else if (config_debug) {
127245868Sjasone			size_t i;
128245868Sjasone			size_t *p = (size_t *)(uintptr_t)ret;
129245868Sjasone
130245868Sjasone			VALGRIND_MAKE_MEM_DEFINED(ret, size);
131245868Sjasone			for (i = 0; i < size / sizeof(size_t); i++)
132245868Sjasone				assert(p[i] == 0);
133242844Sjasone		}
134242844Sjasone	}
135234370Sjasone	return (ret);
136234370Sjasone}
137234370Sjasone
138234370Sjasone/*
139234370Sjasone * If the caller specifies (*zero == false), it is still possible to receive
140234370Sjasone * zeroed memory, in which case *zero is toggled to true.  arena_chunk_alloc()
141234370Sjasone * takes advantage of this to avoid demanding zeroed chunks, but taking
142234370Sjasone * advantage of them if they are returned.
143234370Sjasone */
144234370Sjasonevoid *
145242844Sjasonechunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
146242844Sjasone    dss_prec_t dss_prec)
147234370Sjasone{
148234370Sjasone	void *ret;
149234370Sjasone
150234370Sjasone	assert(size != 0);
151234370Sjasone	assert((size & chunksize_mask) == 0);
152235238Sjasone	assert(alignment != 0);
153234370Sjasone	assert((alignment & chunksize_mask) == 0);
154234370Sjasone
155242844Sjasone	/* "primary" dss. */
156242844Sjasone	if (config_dss && dss_prec == dss_prec_primary) {
157242844Sjasone		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
158242844Sjasone		    alignment, base, zero)) != NULL)
159242844Sjasone			goto label_return;
160242844Sjasone		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
161242844Sjasone			goto label_return;
162242844Sjasone	}
163242844Sjasone	/* mmap. */
164242844Sjasone	if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size,
165242844Sjasone	    alignment, base, zero)) != NULL)
166234370Sjasone		goto label_return;
167242844Sjasone	if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
168234569Sjasone		goto label_return;
169242844Sjasone	/* "secondary" dss. */
170242844Sjasone	if (config_dss && dss_prec == dss_prec_secondary) {
171242844Sjasone		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
172242844Sjasone		    alignment, base, zero)) != NULL)
173234370Sjasone			goto label_return;
174242844Sjasone		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
175242844Sjasone			goto label_return;
176234370Sjasone	}
177234370Sjasone
178234370Sjasone	/* All strategies for allocation failed. */
179234370Sjasone	ret = NULL;
180234370Sjasonelabel_return:
181251300Sjasone	if (ret != NULL) {
182251300Sjasone		if (config_ivsalloc && base == false) {
183251300Sjasone			if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
184251300Sjasone				chunk_dealloc(ret, size, true);
185251300Sjasone				return (NULL);
186251300Sjasone			}
187234370Sjasone		}
188251300Sjasone		if (config_stats || config_prof) {
189251300Sjasone			bool gdump;
190251300Sjasone			malloc_mutex_lock(&chunks_mtx);
191251300Sjasone			if (config_stats)
192251300Sjasone				stats_chunks.nchunks += (size / chunksize);
193251300Sjasone			stats_chunks.curchunks += (size / chunksize);
194251300Sjasone			if (stats_chunks.curchunks > stats_chunks.highchunks) {
195251300Sjasone				stats_chunks.highchunks =
196251300Sjasone				    stats_chunks.curchunks;
197251300Sjasone				if (config_prof)
198251300Sjasone					gdump = true;
199251300Sjasone			} else if (config_prof)
200251300Sjasone				gdump = false;
201251300Sjasone			malloc_mutex_unlock(&chunks_mtx);
202251300Sjasone			if (config_prof && opt_prof && opt_prof_gdump && gdump)
203251300Sjasone				prof_gdump();
204251300Sjasone		}
205251300Sjasone		if (config_valgrind)
206251300Sjasone			VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
207234370Sjasone	}
208234370Sjasone	assert(CHUNK_ADDR2BASE(ret) == ret);
209234370Sjasone	return (ret);
210234370Sjasone}
211234370Sjasone
212234370Sjasonestatic void
213242844Sjasonechunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk,
214242844Sjasone    size_t size)
215234370Sjasone{
216242844Sjasone	bool unzeroed;
217251300Sjasone	extent_node_t *xnode, *node, *prev, *xprev, key;
218234370Sjasone
219242844Sjasone	unzeroed = pages_purge(chunk, size);
220251300Sjasone	VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
221234370Sjasone
222235238Sjasone	/*
223235238Sjasone	 * Allocate a node before acquiring chunks_mtx even though it might not
224235238Sjasone	 * be needed, because base_node_alloc() may cause a new base chunk to
225235238Sjasone	 * be allocated, which could cause deadlock if chunks_mtx were already
226235238Sjasone	 * held.
227235238Sjasone	 */
228235238Sjasone	xnode = base_node_alloc();
229251300Sjasone	/* Use xprev to implement conditional deferred deallocation of prev. */
230251300Sjasone	xprev = NULL;
231235238Sjasone
232234370Sjasone	malloc_mutex_lock(&chunks_mtx);
233235238Sjasone	key.addr = (void *)((uintptr_t)chunk + size);
234242844Sjasone	node = extent_tree_ad_nsearch(chunks_ad, &key);
235235238Sjasone	/* Try to coalesce forward. */
236235238Sjasone	if (node != NULL && node->addr == key.addr) {
237235238Sjasone		/*
238235238Sjasone		 * Coalesce chunk with the following address range.  This does
239235238Sjasone		 * not change the position within chunks_ad, so only
240235238Sjasone		 * remove/insert from/into chunks_szad.
241235238Sjasone		 */
242242844Sjasone		extent_tree_szad_remove(chunks_szad, node);
243235238Sjasone		node->addr = chunk;
244235238Sjasone		node->size += size;
245242844Sjasone		node->zeroed = (node->zeroed && (unzeroed == false));
246242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
247235238Sjasone	} else {
248235238Sjasone		/* Coalescing forward failed, so insert a new node. */
249235238Sjasone		if (xnode == NULL) {
250234370Sjasone			/*
251235238Sjasone			 * base_node_alloc() failed, which is an exceedingly
252235238Sjasone			 * unlikely failure.  Leak chunk; its pages have
253235238Sjasone			 * already been purged, so this is only a virtual
254235238Sjasone			 * memory leak.
255234370Sjasone			 */
256251300Sjasone			goto label_return;
257234370Sjasone		}
258235238Sjasone		node = xnode;
259251300Sjasone		xnode = NULL; /* Prevent deallocation below. */
260235238Sjasone		node->addr = chunk;
261235238Sjasone		node->size = size;
262242844Sjasone		node->zeroed = (unzeroed == false);
263242844Sjasone		extent_tree_ad_insert(chunks_ad, node);
264242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
265234370Sjasone	}
266234370Sjasone
267234370Sjasone	/* Try to coalesce backward. */
268242844Sjasone	prev = extent_tree_ad_prev(chunks_ad, node);
269234370Sjasone	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
270234370Sjasone	    chunk) {
271234370Sjasone		/*
272234370Sjasone		 * Coalesce chunk with the previous address range.  This does
273234370Sjasone		 * not change the position within chunks_ad, so only
274234370Sjasone		 * remove/insert node from/into chunks_szad.
275234370Sjasone		 */
276242844Sjasone		extent_tree_szad_remove(chunks_szad, prev);
277242844Sjasone		extent_tree_ad_remove(chunks_ad, prev);
278234370Sjasone
279242844Sjasone		extent_tree_szad_remove(chunks_szad, node);
280234370Sjasone		node->addr = prev->addr;
281234370Sjasone		node->size += prev->size;
282242844Sjasone		node->zeroed = (node->zeroed && prev->zeroed);
283242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
284234370Sjasone
285251300Sjasone		xprev = prev;
286234370Sjasone	}
287251300Sjasone
288251300Sjasonelabel_return:
289234370Sjasone	malloc_mutex_unlock(&chunks_mtx);
290251300Sjasone	/*
291251300Sjasone	 * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to
292251300Sjasone	 * avoid potential deadlock.
293251300Sjasone	 */
294251300Sjasone	if (xnode != NULL)
295251300Sjasone		base_node_dealloc(xnode);
296251300Sjasone	if (xprev != NULL)
297251300Sjasone		base_node_dealloc(prev);
298234370Sjasone}
299234370Sjasone
300234370Sjasonevoid
301242844Sjasonechunk_unmap(void *chunk, size_t size)
302242844Sjasone{
303242844Sjasone	assert(chunk != NULL);
304242844Sjasone	assert(CHUNK_ADDR2BASE(chunk) == chunk);
305242844Sjasone	assert(size != 0);
306242844Sjasone	assert((size & chunksize_mask) == 0);
307242844Sjasone
308242844Sjasone	if (config_dss && chunk_in_dss(chunk))
309242844Sjasone		chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size);
310242844Sjasone	else if (chunk_dealloc_mmap(chunk, size))
311242844Sjasone		chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size);
312242844Sjasone}
313242844Sjasone
314242844Sjasonevoid
315234370Sjasonechunk_dealloc(void *chunk, size_t size, bool unmap)
316234370Sjasone{
317234370Sjasone
318234370Sjasone	assert(chunk != NULL);
319234370Sjasone	assert(CHUNK_ADDR2BASE(chunk) == chunk);
320234370Sjasone	assert(size != 0);
321234370Sjasone	assert((size & chunksize_mask) == 0);
322234370Sjasone
323234370Sjasone	if (config_ivsalloc)
324234370Sjasone		rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
325234370Sjasone	if (config_stats || config_prof) {
326234370Sjasone		malloc_mutex_lock(&chunks_mtx);
327242844Sjasone		assert(stats_chunks.curchunks >= (size / chunksize));
328234370Sjasone		stats_chunks.curchunks -= (size / chunksize);
329234370Sjasone		malloc_mutex_unlock(&chunks_mtx);
330234370Sjasone	}
331234370Sjasone
332242844Sjasone	if (unmap)
333242844Sjasone		chunk_unmap(chunk, size);
334234370Sjasone}
335234370Sjasone
336234370Sjasonebool
337234569Sjasonechunk_boot(void)
338234370Sjasone{
339234370Sjasone
340234370Sjasone	/* Set variables according to the value of opt_lg_chunk. */
341234370Sjasone	chunksize = (ZU(1) << opt_lg_chunk);
342234370Sjasone	assert(chunksize >= PAGE);
343234370Sjasone	chunksize_mask = chunksize - 1;
344234370Sjasone	chunk_npages = (chunksize >> LG_PAGE);
345234370Sjasone
346234370Sjasone	if (config_stats || config_prof) {
347234370Sjasone		if (malloc_mutex_init(&chunks_mtx))
348234370Sjasone			return (true);
349234370Sjasone		memset(&stats_chunks, 0, sizeof(chunk_stats_t));
350234370Sjasone	}
351234370Sjasone	if (config_dss && chunk_dss_boot())
352234370Sjasone		return (true);
353242844Sjasone	extent_tree_szad_new(&chunks_szad_mmap);
354242844Sjasone	extent_tree_ad_new(&chunks_ad_mmap);
355242844Sjasone	extent_tree_szad_new(&chunks_szad_dss);
356242844Sjasone	extent_tree_ad_new(&chunks_ad_dss);
357234370Sjasone	if (config_ivsalloc) {
358234370Sjasone		chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
359234370Sjasone		    opt_lg_chunk);
360234370Sjasone		if (chunks_rtree == NULL)
361234370Sjasone			return (true);
362234370Sjasone	}
363234370Sjasone
364234370Sjasone	return (false);
365234370Sjasone}
366242844Sjasone
367242844Sjasonevoid
368242844Sjasonechunk_prefork(void)
369242844Sjasone{
370242844Sjasone
371242844Sjasone	malloc_mutex_lock(&chunks_mtx);
372242844Sjasone	if (config_ivsalloc)
373242844Sjasone		rtree_prefork(chunks_rtree);
374242844Sjasone	chunk_dss_prefork();
375242844Sjasone}
376242844Sjasone
377242844Sjasonevoid
378242844Sjasonechunk_postfork_parent(void)
379242844Sjasone{
380242844Sjasone
381242844Sjasone	chunk_dss_postfork_parent();
382242844Sjasone	if (config_ivsalloc)
383242844Sjasone		rtree_postfork_parent(chunks_rtree);
384242844Sjasone	malloc_mutex_postfork_parent(&chunks_mtx);
385242844Sjasone}
386242844Sjasone
387242844Sjasonevoid
388242844Sjasonechunk_postfork_child(void)
389242844Sjasone{
390242844Sjasone
391242844Sjasone	chunk_dss_postfork_child();
392242844Sjasone	if (config_ivsalloc)
393242844Sjasone		rtree_postfork_child(chunks_rtree);
394242844Sjasone	malloc_mutex_postfork_child(&chunks_mtx);
395242844Sjasone}
396