1234370Sjasone#define	JEMALLOC_PROF_C_
2234370Sjasone#include "jemalloc/internal/jemalloc_internal.h"
3234370Sjasone/******************************************************************************/
4234370Sjasone
5234370Sjasone#ifdef JEMALLOC_PROF_LIBUNWIND
6234370Sjasone#define	UNW_LOCAL_ONLY
7234370Sjasone#include <libunwind.h>
8234370Sjasone#endif
9234370Sjasone
10234370Sjasone#ifdef JEMALLOC_PROF_LIBGCC
11234370Sjasone#include <unwind.h>
12234370Sjasone#endif
13234370Sjasone
14234370Sjasone/******************************************************************************/
15234370Sjasone/* Data. */
16234370Sjasone
17234370Sjasonemalloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
18234370Sjasone
19234370Sjasonebool		opt_prof = false;
20234370Sjasonebool		opt_prof_active = true;
21234370Sjasonesize_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
22234370Sjasonessize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
23234370Sjasonebool		opt_prof_gdump = false;
24234543Sjasonebool		opt_prof_final = true;
25234370Sjasonebool		opt_prof_leak = false;
26234543Sjasonebool		opt_prof_accum = false;
27234370Sjasonechar		opt_prof_prefix[PATH_MAX + 1];
28234370Sjasone
29245868Sjasoneuint64_t	prof_interval = 0;
30234370Sjasonebool		prof_promote;
31234370Sjasone
32234370Sjasone/*
33234370Sjasone * Table of mutexes that are shared among ctx's.  These are leaf locks, so
34234370Sjasone * there is no problem with using them for more than one ctx at the same time.
35234370Sjasone * The primary motivation for this sharing though is that ctx's are ephemeral,
36234370Sjasone * and destroying mutexes causes complications for systems that allocate when
37234370Sjasone * creating/destroying mutexes.
38234370Sjasone */
39234370Sjasonestatic malloc_mutex_t	*ctx_locks;
40234370Sjasonestatic unsigned		cum_ctxs; /* Atomic counter. */
41234370Sjasone
42234370Sjasone/*
43234370Sjasone * Global hash of (prof_bt_t *)-->(prof_ctx_t *).  This is the master data
44234370Sjasone * structure that knows about all backtraces currently captured.
45234370Sjasone */
46234370Sjasonestatic ckh_t		bt2ctx;
47234370Sjasonestatic malloc_mutex_t	bt2ctx_mtx;
48234370Sjasone
49234370Sjasonestatic malloc_mutex_t	prof_dump_seq_mtx;
50234370Sjasonestatic uint64_t		prof_dump_seq;
51234370Sjasonestatic uint64_t		prof_dump_iseq;
52234370Sjasonestatic uint64_t		prof_dump_mseq;
53234370Sjasonestatic uint64_t		prof_dump_useq;
54234370Sjasone
55234370Sjasone/*
56234370Sjasone * This buffer is rather large for stack allocation, so use a single buffer for
57234370Sjasone * all profile dumps.  The buffer is implicitly protected by bt2ctx_mtx, since
58234370Sjasone * it must be locked anyway during dumping.
59234370Sjasone */
60234370Sjasonestatic char		prof_dump_buf[PROF_DUMP_BUFSIZE];
61234370Sjasonestatic unsigned		prof_dump_buf_end;
62234370Sjasonestatic int		prof_dump_fd;
63234370Sjasone
64234370Sjasone/* Do not dump any profiles until bootstrapping is complete. */
65234370Sjasonestatic bool		prof_booted = false;
66234370Sjasone
67234370Sjasone/******************************************************************************/
68234370Sjasone/* Function prototypes for non-inline static functions. */
69234370Sjasone
70234370Sjasonestatic prof_bt_t	*bt_dup(prof_bt_t *bt);
71234370Sjasonestatic void	bt_destroy(prof_bt_t *bt);
72234370Sjasone#ifdef JEMALLOC_PROF_LIBGCC
73234370Sjasonestatic _Unwind_Reason_Code	prof_unwind_init_callback(
74234370Sjasone    struct _Unwind_Context *context, void *arg);
75234370Sjasonestatic _Unwind_Reason_Code	prof_unwind_callback(
76234370Sjasone    struct _Unwind_Context *context, void *arg);
77234370Sjasone#endif
78234370Sjasonestatic bool	prof_flush(bool propagate_err);
79234370Sjasonestatic bool	prof_write(bool propagate_err, const char *s);
80234370Sjasonestatic bool	prof_printf(bool propagate_err, const char *format, ...)
81234370Sjasone    JEMALLOC_ATTR(format(printf, 2, 3));
82234370Sjasonestatic void	prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
83234370Sjasone    size_t *leak_nctx);
84234370Sjasonestatic void	prof_ctx_destroy(prof_ctx_t *ctx);
85234370Sjasonestatic void	prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
86234370Sjasonestatic bool	prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx,
87234370Sjasone    prof_bt_t *bt);
88234370Sjasonestatic bool	prof_dump_maps(bool propagate_err);
89234370Sjasonestatic bool	prof_dump(bool propagate_err, const char *filename,
90234370Sjasone    bool leakcheck);
91234370Sjasonestatic void	prof_dump_filename(char *filename, char v, int64_t vseq);
92234370Sjasonestatic void	prof_fdump(void);
93245868Sjasonestatic void	prof_bt_hash(const void *key, size_t r_hash[2]);
94234370Sjasonestatic bool	prof_bt_keycomp(const void *k1, const void *k2);
95234370Sjasonestatic malloc_mutex_t	*prof_ctx_mutex_choose(void);
96234370Sjasone
97234370Sjasone/******************************************************************************/
98234370Sjasone
99234370Sjasonevoid
100234370Sjasonebt_init(prof_bt_t *bt, void **vec)
101234370Sjasone{
102234370Sjasone
103234370Sjasone	cassert(config_prof);
104234370Sjasone
105234370Sjasone	bt->vec = vec;
106234370Sjasone	bt->len = 0;
107234370Sjasone}
108234370Sjasone
109234370Sjasonestatic void
110234370Sjasonebt_destroy(prof_bt_t *bt)
111234370Sjasone{
112234370Sjasone
113234370Sjasone	cassert(config_prof);
114234370Sjasone
115234370Sjasone	idalloc(bt);
116234370Sjasone}
117234370Sjasone
118234370Sjasonestatic prof_bt_t *
119234370Sjasonebt_dup(prof_bt_t *bt)
120234370Sjasone{
121234370Sjasone	prof_bt_t *ret;
122234370Sjasone
123234370Sjasone	cassert(config_prof);
124234370Sjasone
125234370Sjasone	/*
126234370Sjasone	 * Create a single allocation that has space for vec immediately
127234370Sjasone	 * following the prof_bt_t structure.  The backtraces that get
128234370Sjasone	 * stored in the backtrace caches are copied from stack-allocated
129234370Sjasone	 * temporary variables, so size is known at creation time.  Making this
130234370Sjasone	 * a contiguous object improves cache locality.
131234370Sjasone	 */
132234370Sjasone	ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
133234370Sjasone	    (bt->len * sizeof(void *)));
134234370Sjasone	if (ret == NULL)
135234370Sjasone		return (NULL);
136234370Sjasone	ret->vec = (void **)((uintptr_t)ret +
137234370Sjasone	    QUANTUM_CEILING(sizeof(prof_bt_t)));
138234370Sjasone	memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
139234370Sjasone	ret->len = bt->len;
140234370Sjasone
141234370Sjasone	return (ret);
142234370Sjasone}
143234370Sjasone
144234370Sjasonestatic inline void
145235238Sjasoneprof_enter(prof_tdata_t *prof_tdata)
146234370Sjasone{
147234370Sjasone
148234370Sjasone	cassert(config_prof);
149234370Sjasone
150235238Sjasone	assert(prof_tdata->enq == false);
151235238Sjasone	prof_tdata->enq = true;
152234370Sjasone
153234370Sjasone	malloc_mutex_lock(&bt2ctx_mtx);
154234370Sjasone}
155234370Sjasone
156234370Sjasonestatic inline void
157235238Sjasoneprof_leave(prof_tdata_t *prof_tdata)
158234370Sjasone{
159234370Sjasone	bool idump, gdump;
160234370Sjasone
161234370Sjasone	cassert(config_prof);
162234370Sjasone
163234370Sjasone	malloc_mutex_unlock(&bt2ctx_mtx);
164234370Sjasone
165235238Sjasone	assert(prof_tdata->enq);
166235238Sjasone	prof_tdata->enq = false;
167235238Sjasone	idump = prof_tdata->enq_idump;
168235238Sjasone	prof_tdata->enq_idump = false;
169235238Sjasone	gdump = prof_tdata->enq_gdump;
170235238Sjasone	prof_tdata->enq_gdump = false;
171234370Sjasone
172234370Sjasone	if (idump)
173234370Sjasone		prof_idump();
174234370Sjasone	if (gdump)
175234370Sjasone		prof_gdump();
176234370Sjasone}
177234370Sjasone
178234370Sjasone#ifdef JEMALLOC_PROF_LIBUNWIND
179234370Sjasonevoid
180234370Sjasoneprof_backtrace(prof_bt_t *bt, unsigned nignore)
181234370Sjasone{
182234370Sjasone	unw_context_t uc;
183234370Sjasone	unw_cursor_t cursor;
184234370Sjasone	unsigned i;
185234370Sjasone	int err;
186234370Sjasone
187234370Sjasone	cassert(config_prof);
188234370Sjasone	assert(bt->len == 0);
189234370Sjasone	assert(bt->vec != NULL);
190234370Sjasone
191234370Sjasone	unw_getcontext(&uc);
192234370Sjasone	unw_init_local(&cursor, &uc);
193234370Sjasone
194234370Sjasone	/* Throw away (nignore+1) stack frames, if that many exist. */
195234370Sjasone	for (i = 0; i < nignore + 1; i++) {
196234370Sjasone		err = unw_step(&cursor);
197234370Sjasone		if (err <= 0)
198234370Sjasone			return;
199234370Sjasone	}
200234370Sjasone
201234370Sjasone	/*
202234370Sjasone	 * Iterate over stack frames until there are no more, or until no space
203234370Sjasone	 * remains in bt.
204234370Sjasone	 */
205234370Sjasone	for (i = 0; i < PROF_BT_MAX; i++) {
206234370Sjasone		unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
207234370Sjasone		bt->len++;
208234370Sjasone		err = unw_step(&cursor);
209234370Sjasone		if (err <= 0)
210234370Sjasone			break;
211234370Sjasone	}
212234370Sjasone}
213234370Sjasone#elif (defined(JEMALLOC_PROF_LIBGCC))
214234370Sjasonestatic _Unwind_Reason_Code
215234370Sjasoneprof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
216234370Sjasone{
217234370Sjasone
218234370Sjasone	cassert(config_prof);
219234370Sjasone
220234370Sjasone	return (_URC_NO_REASON);
221234370Sjasone}
222234370Sjasone
223234370Sjasonestatic _Unwind_Reason_Code
224234370Sjasoneprof_unwind_callback(struct _Unwind_Context *context, void *arg)
225234370Sjasone{
226234370Sjasone	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
227234370Sjasone
228234370Sjasone	cassert(config_prof);
229234370Sjasone
230234370Sjasone	if (data->nignore > 0)
231234370Sjasone		data->nignore--;
232234370Sjasone	else {
233234370Sjasone		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
234234370Sjasone		data->bt->len++;
235234370Sjasone		if (data->bt->len == data->max)
236234370Sjasone			return (_URC_END_OF_STACK);
237234370Sjasone	}
238234370Sjasone
239234370Sjasone	return (_URC_NO_REASON);
240234370Sjasone}
241234370Sjasone
242234370Sjasonevoid
243234370Sjasoneprof_backtrace(prof_bt_t *bt, unsigned nignore)
244234370Sjasone{
245234370Sjasone	prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX};
246234370Sjasone
247234370Sjasone	cassert(config_prof);
248234370Sjasone
249234370Sjasone	_Unwind_Backtrace(prof_unwind_callback, &data);
250234370Sjasone}
251234370Sjasone#elif (defined(JEMALLOC_PROF_GCC))
252234370Sjasonevoid
253234370Sjasoneprof_backtrace(prof_bt_t *bt, unsigned nignore)
254234370Sjasone{
255234370Sjasone#define	BT_FRAME(i)							\
256234370Sjasone	if ((i) < nignore + PROF_BT_MAX) {				\
257234370Sjasone		void *p;						\
258234370Sjasone		if (__builtin_frame_address(i) == 0)			\
259234370Sjasone			return;						\
260234370Sjasone		p = __builtin_return_address(i);			\
261234370Sjasone		if (p == NULL)						\
262234370Sjasone			return;						\
263234370Sjasone		if (i >= nignore) {					\
264234370Sjasone			bt->vec[(i) - nignore] = p;			\
265234370Sjasone			bt->len = (i) - nignore + 1;			\
266234370Sjasone		}							\
267234370Sjasone	} else								\
268234370Sjasone		return;
269234370Sjasone
270234370Sjasone	cassert(config_prof);
271234370Sjasone	assert(nignore <= 3);
272234370Sjasone
273234370Sjasone	BT_FRAME(0)
274234370Sjasone	BT_FRAME(1)
275234370Sjasone	BT_FRAME(2)
276234370Sjasone	BT_FRAME(3)
277234370Sjasone	BT_FRAME(4)
278234370Sjasone	BT_FRAME(5)
279234370Sjasone	BT_FRAME(6)
280234370Sjasone	BT_FRAME(7)
281234370Sjasone	BT_FRAME(8)
282234370Sjasone	BT_FRAME(9)
283234370Sjasone
284234370Sjasone	BT_FRAME(10)
285234370Sjasone	BT_FRAME(11)
286234370Sjasone	BT_FRAME(12)
287234370Sjasone	BT_FRAME(13)
288234370Sjasone	BT_FRAME(14)
289234370Sjasone	BT_FRAME(15)
290234370Sjasone	BT_FRAME(16)
291234370Sjasone	BT_FRAME(17)
292234370Sjasone	BT_FRAME(18)
293234370Sjasone	BT_FRAME(19)
294234370Sjasone
295234370Sjasone	BT_FRAME(20)
296234370Sjasone	BT_FRAME(21)
297234370Sjasone	BT_FRAME(22)
298234370Sjasone	BT_FRAME(23)
299234370Sjasone	BT_FRAME(24)
300234370Sjasone	BT_FRAME(25)
301234370Sjasone	BT_FRAME(26)
302234370Sjasone	BT_FRAME(27)
303234370Sjasone	BT_FRAME(28)
304234370Sjasone	BT_FRAME(29)
305234370Sjasone
306234370Sjasone	BT_FRAME(30)
307234370Sjasone	BT_FRAME(31)
308234370Sjasone	BT_FRAME(32)
309234370Sjasone	BT_FRAME(33)
310234370Sjasone	BT_FRAME(34)
311234370Sjasone	BT_FRAME(35)
312234370Sjasone	BT_FRAME(36)
313234370Sjasone	BT_FRAME(37)
314234370Sjasone	BT_FRAME(38)
315234370Sjasone	BT_FRAME(39)
316234370Sjasone
317234370Sjasone	BT_FRAME(40)
318234370Sjasone	BT_FRAME(41)
319234370Sjasone	BT_FRAME(42)
320234370Sjasone	BT_FRAME(43)
321234370Sjasone	BT_FRAME(44)
322234370Sjasone	BT_FRAME(45)
323234370Sjasone	BT_FRAME(46)
324234370Sjasone	BT_FRAME(47)
325234370Sjasone	BT_FRAME(48)
326234370Sjasone	BT_FRAME(49)
327234370Sjasone
328234370Sjasone	BT_FRAME(50)
329234370Sjasone	BT_FRAME(51)
330234370Sjasone	BT_FRAME(52)
331234370Sjasone	BT_FRAME(53)
332234370Sjasone	BT_FRAME(54)
333234370Sjasone	BT_FRAME(55)
334234370Sjasone	BT_FRAME(56)
335234370Sjasone	BT_FRAME(57)
336234370Sjasone	BT_FRAME(58)
337234370Sjasone	BT_FRAME(59)
338234370Sjasone
339234370Sjasone	BT_FRAME(60)
340234370Sjasone	BT_FRAME(61)
341234370Sjasone	BT_FRAME(62)
342234370Sjasone	BT_FRAME(63)
343234370Sjasone	BT_FRAME(64)
344234370Sjasone	BT_FRAME(65)
345234370Sjasone	BT_FRAME(66)
346234370Sjasone	BT_FRAME(67)
347234370Sjasone	BT_FRAME(68)
348234370Sjasone	BT_FRAME(69)
349234370Sjasone
350234370Sjasone	BT_FRAME(70)
351234370Sjasone	BT_FRAME(71)
352234370Sjasone	BT_FRAME(72)
353234370Sjasone	BT_FRAME(73)
354234370Sjasone	BT_FRAME(74)
355234370Sjasone	BT_FRAME(75)
356234370Sjasone	BT_FRAME(76)
357234370Sjasone	BT_FRAME(77)
358234370Sjasone	BT_FRAME(78)
359234370Sjasone	BT_FRAME(79)
360234370Sjasone
361234370Sjasone	BT_FRAME(80)
362234370Sjasone	BT_FRAME(81)
363234370Sjasone	BT_FRAME(82)
364234370Sjasone	BT_FRAME(83)
365234370Sjasone	BT_FRAME(84)
366234370Sjasone	BT_FRAME(85)
367234370Sjasone	BT_FRAME(86)
368234370Sjasone	BT_FRAME(87)
369234370Sjasone	BT_FRAME(88)
370234370Sjasone	BT_FRAME(89)
371234370Sjasone
372234370Sjasone	BT_FRAME(90)
373234370Sjasone	BT_FRAME(91)
374234370Sjasone	BT_FRAME(92)
375234370Sjasone	BT_FRAME(93)
376234370Sjasone	BT_FRAME(94)
377234370Sjasone	BT_FRAME(95)
378234370Sjasone	BT_FRAME(96)
379234370Sjasone	BT_FRAME(97)
380234370Sjasone	BT_FRAME(98)
381234370Sjasone	BT_FRAME(99)
382234370Sjasone
383234370Sjasone	BT_FRAME(100)
384234370Sjasone	BT_FRAME(101)
385234370Sjasone	BT_FRAME(102)
386234370Sjasone	BT_FRAME(103)
387234370Sjasone	BT_FRAME(104)
388234370Sjasone	BT_FRAME(105)
389234370Sjasone	BT_FRAME(106)
390234370Sjasone	BT_FRAME(107)
391234370Sjasone	BT_FRAME(108)
392234370Sjasone	BT_FRAME(109)
393234370Sjasone
394234370Sjasone	BT_FRAME(110)
395234370Sjasone	BT_FRAME(111)
396234370Sjasone	BT_FRAME(112)
397234370Sjasone	BT_FRAME(113)
398234370Sjasone	BT_FRAME(114)
399234370Sjasone	BT_FRAME(115)
400234370Sjasone	BT_FRAME(116)
401234370Sjasone	BT_FRAME(117)
402234370Sjasone	BT_FRAME(118)
403234370Sjasone	BT_FRAME(119)
404234370Sjasone
405234370Sjasone	BT_FRAME(120)
406234370Sjasone	BT_FRAME(121)
407234370Sjasone	BT_FRAME(122)
408234370Sjasone	BT_FRAME(123)
409234370Sjasone	BT_FRAME(124)
410234370Sjasone	BT_FRAME(125)
411234370Sjasone	BT_FRAME(126)
412234370Sjasone	BT_FRAME(127)
413234370Sjasone
414234370Sjasone	/* Extras to compensate for nignore. */
415234370Sjasone	BT_FRAME(128)
416234370Sjasone	BT_FRAME(129)
417234370Sjasone	BT_FRAME(130)
418234370Sjasone#undef BT_FRAME
419234370Sjasone}
420234370Sjasone#else
421234370Sjasonevoid
422234370Sjasoneprof_backtrace(prof_bt_t *bt, unsigned nignore)
423234370Sjasone{
424234370Sjasone
425234370Sjasone	cassert(config_prof);
426234370Sjasone	assert(false);
427234370Sjasone}
428234370Sjasone#endif
429234370Sjasone
430234370Sjasoneprof_thr_cnt_t *
431234370Sjasoneprof_lookup(prof_bt_t *bt)
432234370Sjasone{
433234370Sjasone	union {
434234370Sjasone		prof_thr_cnt_t	*p;
435234370Sjasone		void		*v;
436234370Sjasone	} ret;
437234370Sjasone	prof_tdata_t *prof_tdata;
438234370Sjasone
439234370Sjasone	cassert(config_prof);
440234370Sjasone
441251300Sjasone	prof_tdata = prof_tdata_get(false);
442235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
443235238Sjasone		return (NULL);
444234370Sjasone
445234370Sjasone	if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
446234370Sjasone		union {
447234370Sjasone			prof_bt_t	*p;
448234370Sjasone			void		*v;
449234370Sjasone		} btkey;
450234370Sjasone		union {
451234370Sjasone			prof_ctx_t	*p;
452234370Sjasone			void		*v;
453234370Sjasone		} ctx;
454234370Sjasone		bool new_ctx;
455234370Sjasone
456234370Sjasone		/*
457234370Sjasone		 * This thread's cache lacks bt.  Look for it in the global
458234370Sjasone		 * cache.
459234370Sjasone		 */
460235238Sjasone		prof_enter(prof_tdata);
461234370Sjasone		if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
462234370Sjasone			/* bt has never been seen before.  Insert it. */
463234370Sjasone			ctx.v = imalloc(sizeof(prof_ctx_t));
464234370Sjasone			if (ctx.v == NULL) {
465235238Sjasone				prof_leave(prof_tdata);
466234370Sjasone				return (NULL);
467234370Sjasone			}
468234370Sjasone			btkey.p = bt_dup(bt);
469234370Sjasone			if (btkey.v == NULL) {
470235238Sjasone				prof_leave(prof_tdata);
471234370Sjasone				idalloc(ctx.v);
472234370Sjasone				return (NULL);
473234370Sjasone			}
474234370Sjasone			ctx.p->bt = btkey.p;
475234370Sjasone			ctx.p->lock = prof_ctx_mutex_choose();
476235238Sjasone			/*
477235238Sjasone			 * Set nlimbo to 1, in order to avoid a race condition
478235238Sjasone			 * with prof_ctx_merge()/prof_ctx_destroy().
479235238Sjasone			 */
480235238Sjasone			ctx.p->nlimbo = 1;
481234370Sjasone			memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
482234370Sjasone			ql_new(&ctx.p->cnts_ql);
483234370Sjasone			if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
484234370Sjasone				/* OOM. */
485235238Sjasone				prof_leave(prof_tdata);
486234370Sjasone				idalloc(btkey.v);
487234370Sjasone				idalloc(ctx.v);
488234370Sjasone				return (NULL);
489234370Sjasone			}
490234370Sjasone			new_ctx = true;
491234370Sjasone		} else {
492234370Sjasone			/*
493235238Sjasone			 * Increment nlimbo, in order to avoid a race condition
494235238Sjasone			 * with prof_ctx_merge()/prof_ctx_destroy().
495234370Sjasone			 */
496234370Sjasone			malloc_mutex_lock(ctx.p->lock);
497235238Sjasone			ctx.p->nlimbo++;
498234370Sjasone			malloc_mutex_unlock(ctx.p->lock);
499234370Sjasone			new_ctx = false;
500234370Sjasone		}
501235238Sjasone		prof_leave(prof_tdata);
502234370Sjasone
503234370Sjasone		/* Link a prof_thd_cnt_t into ctx for this thread. */
504234370Sjasone		if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
505234370Sjasone			assert(ckh_count(&prof_tdata->bt2cnt) > 0);
506234370Sjasone			/*
507234370Sjasone			 * Flush the least recently used cnt in order to keep
508234370Sjasone			 * bt2cnt from becoming too large.
509234370Sjasone			 */
510234370Sjasone			ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
511234370Sjasone			assert(ret.v != NULL);
512234370Sjasone			if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
513234370Sjasone			    NULL, NULL))
514234370Sjasone				assert(false);
515234370Sjasone			ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
516234370Sjasone			prof_ctx_merge(ret.p->ctx, ret.p);
517234370Sjasone			/* ret can now be re-used. */
518234370Sjasone		} else {
519234370Sjasone			assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
520234370Sjasone			/* Allocate and partially initialize a new cnt. */
521234370Sjasone			ret.v = imalloc(sizeof(prof_thr_cnt_t));
522234370Sjasone			if (ret.p == NULL) {
523234370Sjasone				if (new_ctx)
524234370Sjasone					prof_ctx_destroy(ctx.p);
525234370Sjasone				return (NULL);
526234370Sjasone			}
527234370Sjasone			ql_elm_new(ret.p, cnts_link);
528234370Sjasone			ql_elm_new(ret.p, lru_link);
529234370Sjasone		}
530234370Sjasone		/* Finish initializing ret. */
531234370Sjasone		ret.p->ctx = ctx.p;
532234370Sjasone		ret.p->epoch = 0;
533234370Sjasone		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
534234370Sjasone		if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
535234370Sjasone			if (new_ctx)
536234370Sjasone				prof_ctx_destroy(ctx.p);
537234370Sjasone			idalloc(ret.v);
538234370Sjasone			return (NULL);
539234370Sjasone		}
540234370Sjasone		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
541234370Sjasone		malloc_mutex_lock(ctx.p->lock);
542234370Sjasone		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
543235238Sjasone		ctx.p->nlimbo--;
544234370Sjasone		malloc_mutex_unlock(ctx.p->lock);
545234370Sjasone	} else {
546234370Sjasone		/* Move ret to the front of the LRU. */
547234370Sjasone		ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
548234370Sjasone		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
549234370Sjasone	}
550234370Sjasone
551234370Sjasone	return (ret.p);
552234370Sjasone}
553234370Sjasone
554234370Sjasonestatic bool
555234370Sjasoneprof_flush(bool propagate_err)
556234370Sjasone{
557234370Sjasone	bool ret = false;
558234370Sjasone	ssize_t err;
559234370Sjasone
560234370Sjasone	cassert(config_prof);
561234370Sjasone
562234370Sjasone	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
563234370Sjasone	if (err == -1) {
564234370Sjasone		if (propagate_err == false) {
565234370Sjasone			malloc_write("<jemalloc>: write() failed during heap "
566234370Sjasone			    "profile flush\n");
567234370Sjasone			if (opt_abort)
568234370Sjasone				abort();
569234370Sjasone		}
570234370Sjasone		ret = true;
571234370Sjasone	}
572234370Sjasone	prof_dump_buf_end = 0;
573234370Sjasone
574234370Sjasone	return (ret);
575234370Sjasone}
576234370Sjasone
577234370Sjasonestatic bool
578234370Sjasoneprof_write(bool propagate_err, const char *s)
579234370Sjasone{
580234370Sjasone	unsigned i, slen, n;
581234370Sjasone
582234370Sjasone	cassert(config_prof);
583234370Sjasone
584234370Sjasone	i = 0;
585234370Sjasone	slen = strlen(s);
586234370Sjasone	while (i < slen) {
587234370Sjasone		/* Flush the buffer if it is full. */
588234370Sjasone		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
589234370Sjasone			if (prof_flush(propagate_err) && propagate_err)
590234370Sjasone				return (true);
591234370Sjasone
592234370Sjasone		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
593234370Sjasone			/* Finish writing. */
594234370Sjasone			n = slen - i;
595234370Sjasone		} else {
596234370Sjasone			/* Write as much of s as will fit. */
597234370Sjasone			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
598234370Sjasone		}
599234370Sjasone		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
600234370Sjasone		prof_dump_buf_end += n;
601234370Sjasone		i += n;
602234370Sjasone	}
603234370Sjasone
604234370Sjasone	return (false);
605234370Sjasone}
606234370Sjasone
607234370SjasoneJEMALLOC_ATTR(format(printf, 2, 3))
608234370Sjasonestatic bool
609234370Sjasoneprof_printf(bool propagate_err, const char *format, ...)
610234370Sjasone{
611234370Sjasone	bool ret;
612234370Sjasone	va_list ap;
613234370Sjasone	char buf[PROF_PRINTF_BUFSIZE];
614234370Sjasone
615234370Sjasone	va_start(ap, format);
616234370Sjasone	malloc_vsnprintf(buf, sizeof(buf), format, ap);
617234370Sjasone	va_end(ap);
618234370Sjasone	ret = prof_write(propagate_err, buf);
619234370Sjasone
620234370Sjasone	return (ret);
621234370Sjasone}
622234370Sjasone
623234370Sjasonestatic void
624234370Sjasoneprof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
625234370Sjasone{
626234370Sjasone	prof_thr_cnt_t *thr_cnt;
627234370Sjasone	prof_cnt_t tcnt;
628234370Sjasone
629234370Sjasone	cassert(config_prof);
630234370Sjasone
631234370Sjasone	malloc_mutex_lock(ctx->lock);
632234370Sjasone
633234370Sjasone	memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
634234370Sjasone	ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
635234370Sjasone		volatile unsigned *epoch = &thr_cnt->epoch;
636234370Sjasone
637234370Sjasone		while (true) {
638234370Sjasone			unsigned epoch0 = *epoch;
639234370Sjasone
640234370Sjasone			/* Make sure epoch is even. */
641234370Sjasone			if (epoch0 & 1U)
642234370Sjasone				continue;
643234370Sjasone
644234370Sjasone			memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
645234370Sjasone
646234370Sjasone			/* Terminate if epoch didn't change while reading. */
647234370Sjasone			if (*epoch == epoch0)
648234370Sjasone				break;
649234370Sjasone		}
650234370Sjasone
651234370Sjasone		ctx->cnt_summed.curobjs += tcnt.curobjs;
652234370Sjasone		ctx->cnt_summed.curbytes += tcnt.curbytes;
653234370Sjasone		if (opt_prof_accum) {
654234370Sjasone			ctx->cnt_summed.accumobjs += tcnt.accumobjs;
655234370Sjasone			ctx->cnt_summed.accumbytes += tcnt.accumbytes;
656234370Sjasone		}
657234370Sjasone	}
658234370Sjasone
659234370Sjasone	if (ctx->cnt_summed.curobjs != 0)
660234370Sjasone		(*leak_nctx)++;
661234370Sjasone
662234370Sjasone	/* Add to cnt_all. */
663234370Sjasone	cnt_all->curobjs += ctx->cnt_summed.curobjs;
664234370Sjasone	cnt_all->curbytes += ctx->cnt_summed.curbytes;
665234370Sjasone	if (opt_prof_accum) {
666234370Sjasone		cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
667234370Sjasone		cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
668234370Sjasone	}
669234370Sjasone
670234370Sjasone	malloc_mutex_unlock(ctx->lock);
671234370Sjasone}
672234370Sjasone
673234370Sjasonestatic void
674234370Sjasoneprof_ctx_destroy(prof_ctx_t *ctx)
675234370Sjasone{
676235238Sjasone	prof_tdata_t *prof_tdata;
677234370Sjasone
678234370Sjasone	cassert(config_prof);
679234370Sjasone
680234370Sjasone	/*
681234370Sjasone	 * Check that ctx is still unused by any thread cache before destroying
682235238Sjasone	 * it.  prof_lookup() increments ctx->nlimbo in order to avoid a race
683235238Sjasone	 * condition with this function, as does prof_ctx_merge() in order to
684235238Sjasone	 * avoid a race between the main body of prof_ctx_merge() and entry
685235238Sjasone	 * into this function.
686234370Sjasone	 */
687251300Sjasone	prof_tdata = prof_tdata_get(false);
688235238Sjasone	assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
689235238Sjasone	prof_enter(prof_tdata);
690234370Sjasone	malloc_mutex_lock(ctx->lock);
691235238Sjasone	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
692235238Sjasone	    ctx->nlimbo == 1) {
693234370Sjasone		assert(ctx->cnt_merged.curbytes == 0);
694234370Sjasone		assert(ctx->cnt_merged.accumobjs == 0);
695234370Sjasone		assert(ctx->cnt_merged.accumbytes == 0);
696234370Sjasone		/* Remove ctx from bt2ctx. */
697234370Sjasone		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
698234370Sjasone			assert(false);
699235238Sjasone		prof_leave(prof_tdata);
700234370Sjasone		/* Destroy ctx. */
701234370Sjasone		malloc_mutex_unlock(ctx->lock);
702234370Sjasone		bt_destroy(ctx->bt);
703234370Sjasone		idalloc(ctx);
704234370Sjasone	} else {
705234370Sjasone		/*
706234370Sjasone		 * Compensate for increment in prof_ctx_merge() or
707234370Sjasone		 * prof_lookup().
708234370Sjasone		 */
709235238Sjasone		ctx->nlimbo--;
710234370Sjasone		malloc_mutex_unlock(ctx->lock);
711235238Sjasone		prof_leave(prof_tdata);
712234370Sjasone	}
713234370Sjasone}
714234370Sjasone
715234370Sjasonestatic void
716234370Sjasoneprof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
717234370Sjasone{
718234370Sjasone	bool destroy;
719234370Sjasone
720234370Sjasone	cassert(config_prof);
721234370Sjasone
722234370Sjasone	/* Merge cnt stats and detach from ctx. */
723234370Sjasone	malloc_mutex_lock(ctx->lock);
724234370Sjasone	ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
725234370Sjasone	ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
726234370Sjasone	ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
727234370Sjasone	ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
728234370Sjasone	ql_remove(&ctx->cnts_ql, cnt, cnts_link);
729234370Sjasone	if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
730235238Sjasone	    ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
731234370Sjasone		/*
732235238Sjasone		 * Increment ctx->nlimbo in order to keep another thread from
733235238Sjasone		 * winning the race to destroy ctx while this one has ctx->lock
734235238Sjasone		 * dropped.  Without this, it would be possible for another
735235238Sjasone		 * thread to:
736234370Sjasone		 *
737234370Sjasone		 * 1) Sample an allocation associated with ctx.
738234370Sjasone		 * 2) Deallocate the sampled object.
739234370Sjasone		 * 3) Successfully prof_ctx_destroy(ctx).
740234370Sjasone		 *
741234370Sjasone		 * The result would be that ctx no longer exists by the time
742234370Sjasone		 * this thread accesses it in prof_ctx_destroy().
743234370Sjasone		 */
744235238Sjasone		ctx->nlimbo++;
745234370Sjasone		destroy = true;
746234370Sjasone	} else
747234370Sjasone		destroy = false;
748234370Sjasone	malloc_mutex_unlock(ctx->lock);
749234370Sjasone	if (destroy)
750234370Sjasone		prof_ctx_destroy(ctx);
751234370Sjasone}
752234370Sjasone
753234370Sjasonestatic bool
754234370Sjasoneprof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt)
755234370Sjasone{
756234370Sjasone	unsigned i;
757234370Sjasone
758234370Sjasone	cassert(config_prof);
759234370Sjasone
760235238Sjasone	/*
761235238Sjasone	 * Current statistics can sum to 0 as a result of unmerged per thread
762235238Sjasone	 * statistics.  Additionally, interval- and growth-triggered dumps can
763235238Sjasone	 * occur between the time a ctx is created and when its statistics are
764235238Sjasone	 * filled in.  Avoid dumping any ctx that is an artifact of either
765235238Sjasone	 * implementation detail.
766235238Sjasone	 */
767235238Sjasone	if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
768235238Sjasone	    (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
769235238Sjasone		assert(ctx->cnt_summed.curobjs == 0);
770234370Sjasone		assert(ctx->cnt_summed.curbytes == 0);
771234370Sjasone		assert(ctx->cnt_summed.accumobjs == 0);
772234370Sjasone		assert(ctx->cnt_summed.accumbytes == 0);
773234370Sjasone		return (false);
774234370Sjasone	}
775234370Sjasone
776234370Sjasone	if (prof_printf(propagate_err, "%"PRId64": %"PRId64
777234370Sjasone	    " [%"PRIu64": %"PRIu64"] @",
778234370Sjasone	    ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
779234370Sjasone	    ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes))
780234370Sjasone		return (true);
781234370Sjasone
782234370Sjasone	for (i = 0; i < bt->len; i++) {
783234370Sjasone		if (prof_printf(propagate_err, " %#"PRIxPTR,
784234370Sjasone		    (uintptr_t)bt->vec[i]))
785234370Sjasone			return (true);
786234370Sjasone	}
787234370Sjasone
788234370Sjasone	if (prof_write(propagate_err, "\n"))
789234370Sjasone		return (true);
790234370Sjasone
791234370Sjasone	return (false);
792234370Sjasone}
793234370Sjasone
794234370Sjasonestatic bool
795234370Sjasoneprof_dump_maps(bool propagate_err)
796234370Sjasone{
797234370Sjasone	int mfd;
798234370Sjasone	char filename[PATH_MAX + 1];
799234370Sjasone
800234370Sjasone	cassert(config_prof);
801234370Sjasone
802234370Sjasone	malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
803234370Sjasone	    (int)getpid());
804234370Sjasone	mfd = open(filename, O_RDONLY);
805234370Sjasone	if (mfd != -1) {
806234370Sjasone		ssize_t nread;
807234370Sjasone
808234370Sjasone		if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
809234370Sjasone		    propagate_err)
810234370Sjasone			return (true);
811234370Sjasone		nread = 0;
812234370Sjasone		do {
813234370Sjasone			prof_dump_buf_end += nread;
814234370Sjasone			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
815234370Sjasone				/* Make space in prof_dump_buf before read(). */
816234370Sjasone				if (prof_flush(propagate_err) && propagate_err)
817234370Sjasone					return (true);
818234370Sjasone			}
819234370Sjasone			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
820234370Sjasone			    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
821234370Sjasone		} while (nread > 0);
822234370Sjasone		close(mfd);
823234370Sjasone	} else
824234370Sjasone		return (true);
825234370Sjasone
826234370Sjasone	return (false);
827234370Sjasone}
828234370Sjasone
829234370Sjasonestatic bool
830234370Sjasoneprof_dump(bool propagate_err, const char *filename, bool leakcheck)
831234370Sjasone{
832235238Sjasone	prof_tdata_t *prof_tdata;
833234370Sjasone	prof_cnt_t cnt_all;
834234370Sjasone	size_t tabind;
835234370Sjasone	union {
836234370Sjasone		prof_bt_t	*p;
837234370Sjasone		void		*v;
838234370Sjasone	} bt;
839234370Sjasone	union {
840234370Sjasone		prof_ctx_t	*p;
841234370Sjasone		void		*v;
842234370Sjasone	} ctx;
843234370Sjasone	size_t leak_nctx;
844234370Sjasone
845234370Sjasone	cassert(config_prof);
846234370Sjasone
847251300Sjasone	prof_tdata = prof_tdata_get(false);
848235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
849235238Sjasone		return (true);
850235238Sjasone	prof_enter(prof_tdata);
851234370Sjasone	prof_dump_fd = creat(filename, 0644);
852234370Sjasone	if (prof_dump_fd == -1) {
853234370Sjasone		if (propagate_err == false) {
854234370Sjasone			malloc_printf(
855234370Sjasone			    "<jemalloc>: creat(\"%s\"), 0644) failed\n",
856234370Sjasone			    filename);
857234370Sjasone			if (opt_abort)
858234370Sjasone				abort();
859234370Sjasone		}
860234370Sjasone		goto label_error;
861234370Sjasone	}
862234370Sjasone
863234370Sjasone	/* Merge per thread profile stats, and sum them in cnt_all. */
864234370Sjasone	memset(&cnt_all, 0, sizeof(prof_cnt_t));
865234370Sjasone	leak_nctx = 0;
866234370Sjasone	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
867234370Sjasone		prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
868234370Sjasone
869234370Sjasone	/* Dump profile header. */
870234370Sjasone	if (opt_lg_prof_sample == 0) {
871234370Sjasone		if (prof_printf(propagate_err,
872234370Sjasone		    "heap profile: %"PRId64": %"PRId64
873234370Sjasone		    " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
874234370Sjasone		    cnt_all.curobjs, cnt_all.curbytes,
875234370Sjasone		    cnt_all.accumobjs, cnt_all.accumbytes))
876234370Sjasone			goto label_error;
877234370Sjasone	} else {
878234370Sjasone		if (prof_printf(propagate_err,
879234370Sjasone		    "heap profile: %"PRId64": %"PRId64
880234370Sjasone		    " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
881234370Sjasone		    cnt_all.curobjs, cnt_all.curbytes,
882234370Sjasone		    cnt_all.accumobjs, cnt_all.accumbytes,
883234370Sjasone		    ((uint64_t)1U << opt_lg_prof_sample)))
884234370Sjasone			goto label_error;
885234370Sjasone	}
886234370Sjasone
887234370Sjasone	/* Dump  per ctx profile stats. */
888234370Sjasone	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
889234370Sjasone	    == false;) {
890234370Sjasone		if (prof_dump_ctx(propagate_err, ctx.p, bt.p))
891234370Sjasone			goto label_error;
892234370Sjasone	}
893234370Sjasone
894234370Sjasone	/* Dump /proc/<pid>/maps if possible. */
895234370Sjasone	if (prof_dump_maps(propagate_err))
896234370Sjasone		goto label_error;
897234370Sjasone
898234370Sjasone	if (prof_flush(propagate_err))
899234370Sjasone		goto label_error;
900234370Sjasone	close(prof_dump_fd);
901235238Sjasone	prof_leave(prof_tdata);
902234370Sjasone
903234370Sjasone	if (leakcheck && cnt_all.curbytes != 0) {
904234370Sjasone		malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
905234370Sjasone		    PRId64" object%s, %zu context%s\n",
906234370Sjasone		    cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "",
907234370Sjasone		    cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "",
908234370Sjasone		    leak_nctx, (leak_nctx != 1) ? "s" : "");
909234370Sjasone		malloc_printf(
910234370Sjasone		    "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
911234370Sjasone		    filename);
912234370Sjasone	}
913234370Sjasone
914234370Sjasone	return (false);
915234370Sjasonelabel_error:
916235238Sjasone	prof_leave(prof_tdata);
917234370Sjasone	return (true);
918234370Sjasone}
919234370Sjasone
920234370Sjasone#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
921234370Sjasonestatic void
922234370Sjasoneprof_dump_filename(char *filename, char v, int64_t vseq)
923234370Sjasone{
924234370Sjasone
925234370Sjasone	cassert(config_prof);
926234370Sjasone
927234370Sjasone	if (vseq != UINT64_C(0xffffffffffffffff)) {
928234370Sjasone	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
929234370Sjasone		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
930234370Sjasone		    "%s.%d.%"PRIu64".%c%"PRId64".heap",
931234370Sjasone		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
932234370Sjasone	} else {
933234370Sjasone	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
934234370Sjasone		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
935234370Sjasone		    "%s.%d.%"PRIu64".%c.heap",
936234370Sjasone		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
937234370Sjasone	}
938235238Sjasone	prof_dump_seq++;
939234370Sjasone}
940234370Sjasone
941234370Sjasonestatic void
942234370Sjasoneprof_fdump(void)
943234370Sjasone{
944234370Sjasone	char filename[DUMP_FILENAME_BUFSIZE];
945234370Sjasone
946234370Sjasone	cassert(config_prof);
947234370Sjasone
948234370Sjasone	if (prof_booted == false)
949234370Sjasone		return;
950234370Sjasone
951234543Sjasone	if (opt_prof_final && opt_prof_prefix[0] != '\0') {
952234370Sjasone		malloc_mutex_lock(&prof_dump_seq_mtx);
953234370Sjasone		prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff));
954234370Sjasone		malloc_mutex_unlock(&prof_dump_seq_mtx);
955234370Sjasone		prof_dump(false, filename, opt_prof_leak);
956234370Sjasone	}
957234370Sjasone}
958234370Sjasone
959234370Sjasonevoid
960234370Sjasoneprof_idump(void)
961234370Sjasone{
962235238Sjasone	prof_tdata_t *prof_tdata;
963234370Sjasone	char filename[PATH_MAX + 1];
964234370Sjasone
965234370Sjasone	cassert(config_prof);
966234370Sjasone
967234370Sjasone	if (prof_booted == false)
968234370Sjasone		return;
969251300Sjasone	prof_tdata = prof_tdata_get(false);
970235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
971234370Sjasone		return;
972235238Sjasone	if (prof_tdata->enq) {
973235238Sjasone		prof_tdata->enq_idump = true;
974235238Sjasone		return;
975234370Sjasone	}
976234370Sjasone
977234370Sjasone	if (opt_prof_prefix[0] != '\0') {
978234370Sjasone		malloc_mutex_lock(&prof_dump_seq_mtx);
979234370Sjasone		prof_dump_filename(filename, 'i', prof_dump_iseq);
980234370Sjasone		prof_dump_iseq++;
981234370Sjasone		malloc_mutex_unlock(&prof_dump_seq_mtx);
982234370Sjasone		prof_dump(false, filename, false);
983234370Sjasone	}
984234370Sjasone}
985234370Sjasone
986234370Sjasonebool
987234370Sjasoneprof_mdump(const char *filename)
988234370Sjasone{
989234370Sjasone	char filename_buf[DUMP_FILENAME_BUFSIZE];
990234370Sjasone
991234370Sjasone	cassert(config_prof);
992234370Sjasone
993234370Sjasone	if (opt_prof == false || prof_booted == false)
994234370Sjasone		return (true);
995234370Sjasone
996234370Sjasone	if (filename == NULL) {
997234370Sjasone		/* No filename specified, so automatically generate one. */
998234370Sjasone		if (opt_prof_prefix[0] == '\0')
999234370Sjasone			return (true);
1000234370Sjasone		malloc_mutex_lock(&prof_dump_seq_mtx);
1001234370Sjasone		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1002234370Sjasone		prof_dump_mseq++;
1003234370Sjasone		malloc_mutex_unlock(&prof_dump_seq_mtx);
1004234370Sjasone		filename = filename_buf;
1005234370Sjasone	}
1006234370Sjasone	return (prof_dump(true, filename, false));
1007234370Sjasone}
1008234370Sjasone
1009234370Sjasonevoid
1010234370Sjasoneprof_gdump(void)
1011234370Sjasone{
1012235238Sjasone	prof_tdata_t *prof_tdata;
1013234370Sjasone	char filename[DUMP_FILENAME_BUFSIZE];
1014234370Sjasone
1015234370Sjasone	cassert(config_prof);
1016234370Sjasone
1017234370Sjasone	if (prof_booted == false)
1018234370Sjasone		return;
1019251300Sjasone	prof_tdata = prof_tdata_get(false);
1020235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1021234370Sjasone		return;
1022235238Sjasone	if (prof_tdata->enq) {
1023235238Sjasone		prof_tdata->enq_gdump = true;
1024235238Sjasone		return;
1025234370Sjasone	}
1026234370Sjasone
1027234370Sjasone	if (opt_prof_prefix[0] != '\0') {
1028234370Sjasone		malloc_mutex_lock(&prof_dump_seq_mtx);
1029234370Sjasone		prof_dump_filename(filename, 'u', prof_dump_useq);
1030234370Sjasone		prof_dump_useq++;
1031234370Sjasone		malloc_mutex_unlock(&prof_dump_seq_mtx);
1032234370Sjasone		prof_dump(false, filename, false);
1033234370Sjasone	}
1034234370Sjasone}
1035234370Sjasone
1036234370Sjasonestatic void
1037245868Sjasoneprof_bt_hash(const void *key, size_t r_hash[2])
1038234370Sjasone{
1039234370Sjasone	prof_bt_t *bt = (prof_bt_t *)key;
1040234370Sjasone
1041234370Sjasone	cassert(config_prof);
1042234370Sjasone
1043245868Sjasone	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1044234370Sjasone}
1045234370Sjasone
1046234370Sjasonestatic bool
1047234370Sjasoneprof_bt_keycomp(const void *k1, const void *k2)
1048234370Sjasone{
1049234370Sjasone	const prof_bt_t *bt1 = (prof_bt_t *)k1;
1050234370Sjasone	const prof_bt_t *bt2 = (prof_bt_t *)k2;
1051234370Sjasone
1052234370Sjasone	cassert(config_prof);
1053234370Sjasone
1054234370Sjasone	if (bt1->len != bt2->len)
1055234370Sjasone		return (false);
1056234370Sjasone	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1057234370Sjasone}
1058234370Sjasone
1059234370Sjasonestatic malloc_mutex_t *
1060234370Sjasoneprof_ctx_mutex_choose(void)
1061234370Sjasone{
1062234370Sjasone	unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
1063234370Sjasone
1064234370Sjasone	return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
1065234370Sjasone}
1066234370Sjasone
1067234370Sjasoneprof_tdata_t *
1068234370Sjasoneprof_tdata_init(void)
1069234370Sjasone{
1070234370Sjasone	prof_tdata_t *prof_tdata;
1071234370Sjasone
1072234370Sjasone	cassert(config_prof);
1073234370Sjasone
1074234370Sjasone	/* Initialize an empty cache for this thread. */
1075234370Sjasone	prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1076234370Sjasone	if (prof_tdata == NULL)
1077234370Sjasone		return (NULL);
1078234370Sjasone
1079234370Sjasone	if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1080234370Sjasone	    prof_bt_hash, prof_bt_keycomp)) {
1081234370Sjasone		idalloc(prof_tdata);
1082234370Sjasone		return (NULL);
1083234370Sjasone	}
1084234370Sjasone	ql_new(&prof_tdata->lru_ql);
1085234370Sjasone
1086234370Sjasone	prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
1087234370Sjasone	if (prof_tdata->vec == NULL) {
1088234370Sjasone		ckh_delete(&prof_tdata->bt2cnt);
1089234370Sjasone		idalloc(prof_tdata);
1090234370Sjasone		return (NULL);
1091234370Sjasone	}
1092234370Sjasone
1093234370Sjasone	prof_tdata->prng_state = 0;
1094234370Sjasone	prof_tdata->threshold = 0;
1095234370Sjasone	prof_tdata->accum = 0;
1096234370Sjasone
1097235238Sjasone	prof_tdata->enq = false;
1098235238Sjasone	prof_tdata->enq_idump = false;
1099235238Sjasone	prof_tdata->enq_gdump = false;
1100235238Sjasone
1101234370Sjasone	prof_tdata_tsd_set(&prof_tdata);
1102234370Sjasone
1103234370Sjasone	return (prof_tdata);
1104234370Sjasone}
1105234370Sjasone
1106234370Sjasonevoid
1107234370Sjasoneprof_tdata_cleanup(void *arg)
1108234370Sjasone{
1109234370Sjasone	prof_thr_cnt_t *cnt;
1110234370Sjasone	prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
1111234370Sjasone
1112234370Sjasone	cassert(config_prof);
1113234370Sjasone
1114235238Sjasone	if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
1115235238Sjasone		/*
1116235238Sjasone		 * Another destructor deallocated memory after this destructor
1117235238Sjasone		 * was called.  Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
1118235238Sjasone		 * in order to receive another callback.
1119235238Sjasone		 */
1120235238Sjasone		prof_tdata = PROF_TDATA_STATE_PURGATORY;
1121235238Sjasone		prof_tdata_tsd_set(&prof_tdata);
1122235238Sjasone	} else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
1123235238Sjasone		/*
1124235238Sjasone		 * The previous time this destructor was called, we set the key
1125235238Sjasone		 * to PROF_TDATA_STATE_PURGATORY so that other destructors
1126235238Sjasone		 * wouldn't cause re-creation of the prof_tdata.  This time, do
1127235238Sjasone		 * nothing, so that the destructor will not be called again.
1128235238Sjasone		 */
1129235238Sjasone	} else if (prof_tdata != NULL) {
1130235238Sjasone		/*
1131235238Sjasone		 * Delete the hash table.  All of its contents can still be
1132235238Sjasone		 * iterated over via the LRU.
1133235238Sjasone		 */
1134235238Sjasone		ckh_delete(&prof_tdata->bt2cnt);
1135235238Sjasone		/*
1136235238Sjasone		 * Iteratively merge cnt's into the global stats and delete
1137235238Sjasone		 * them.
1138235238Sjasone		 */
1139235238Sjasone		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1140235238Sjasone			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1141235238Sjasone			prof_ctx_merge(cnt->ctx, cnt);
1142235238Sjasone			idalloc(cnt);
1143235238Sjasone		}
1144235238Sjasone		idalloc(prof_tdata->vec);
1145235238Sjasone		idalloc(prof_tdata);
1146235238Sjasone		prof_tdata = PROF_TDATA_STATE_PURGATORY;
1147235238Sjasone		prof_tdata_tsd_set(&prof_tdata);
1148234370Sjasone	}
1149234370Sjasone}
1150234370Sjasone
1151234370Sjasonevoid
1152234370Sjasoneprof_boot0(void)
1153234370Sjasone{
1154234370Sjasone
1155234370Sjasone	cassert(config_prof);
1156234370Sjasone
1157234370Sjasone	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1158234370Sjasone	    sizeof(PROF_PREFIX_DEFAULT));
1159234370Sjasone}
1160234370Sjasone
1161234370Sjasonevoid
1162234370Sjasoneprof_boot1(void)
1163234370Sjasone{
1164234370Sjasone
1165234370Sjasone	cassert(config_prof);
1166234370Sjasone
1167234370Sjasone	/*
1168234370Sjasone	 * opt_prof and prof_promote must be in their final state before any
1169234370Sjasone	 * arenas are initialized, so this function must be executed early.
1170234370Sjasone	 */
1171234370Sjasone
1172234370Sjasone	if (opt_prof_leak && opt_prof == false) {
1173234370Sjasone		/*
1174234370Sjasone		 * Enable opt_prof, but in such a way that profiles are never
1175234370Sjasone		 * automatically dumped.
1176234370Sjasone		 */
1177234370Sjasone		opt_prof = true;
1178234370Sjasone		opt_prof_gdump = false;
1179234370Sjasone	} else if (opt_prof) {
1180234370Sjasone		if (opt_lg_prof_interval >= 0) {
1181234370Sjasone			prof_interval = (((uint64_t)1U) <<
1182234370Sjasone			    opt_lg_prof_interval);
1183245868Sjasone		}
1184234370Sjasone	}
1185234370Sjasone
1186234370Sjasone	prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
1187234370Sjasone}
1188234370Sjasone
1189234370Sjasonebool
1190234370Sjasoneprof_boot2(void)
1191234370Sjasone{
1192234370Sjasone
1193234370Sjasone	cassert(config_prof);
1194234370Sjasone
1195234370Sjasone	if (opt_prof) {
1196234370Sjasone		unsigned i;
1197234370Sjasone
1198234370Sjasone		if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1199234370Sjasone		    prof_bt_keycomp))
1200234370Sjasone			return (true);
1201234370Sjasone		if (malloc_mutex_init(&bt2ctx_mtx))
1202234370Sjasone			return (true);
1203234370Sjasone		if (prof_tdata_tsd_boot()) {
1204234370Sjasone			malloc_write(
1205234370Sjasone			    "<jemalloc>: Error in pthread_key_create()\n");
1206234370Sjasone			abort();
1207234370Sjasone		}
1208234370Sjasone
1209234370Sjasone		if (malloc_mutex_init(&prof_dump_seq_mtx))
1210234370Sjasone			return (true);
1211234370Sjasone
1212234370Sjasone		if (atexit(prof_fdump) != 0) {
1213234370Sjasone			malloc_write("<jemalloc>: Error in atexit()\n");
1214234370Sjasone			if (opt_abort)
1215234370Sjasone				abort();
1216234370Sjasone		}
1217234370Sjasone
1218234370Sjasone		ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
1219234370Sjasone		    sizeof(malloc_mutex_t));
1220234370Sjasone		if (ctx_locks == NULL)
1221234370Sjasone			return (true);
1222234370Sjasone		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
1223234370Sjasone			if (malloc_mutex_init(&ctx_locks[i]))
1224234370Sjasone				return (true);
1225234370Sjasone		}
1226234370Sjasone	}
1227234370Sjasone
1228234370Sjasone#ifdef JEMALLOC_PROF_LIBGCC
1229234370Sjasone	/*
1230234370Sjasone	 * Cause the backtracing machinery to allocate its internal state
1231234370Sjasone	 * before enabling profiling.
1232234370Sjasone	 */
1233234370Sjasone	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
1234234370Sjasone#endif
1235234370Sjasone
1236234370Sjasone	prof_booted = true;
1237234370Sjasone
1238234370Sjasone	return (false);
1239234370Sjasone}
1240234370Sjasone
1241242844Sjasonevoid
1242242844Sjasoneprof_prefork(void)
1243242844Sjasone{
1244242844Sjasone
1245242844Sjasone	if (opt_prof) {
1246242844Sjasone		unsigned i;
1247242844Sjasone
1248242844Sjasone		malloc_mutex_lock(&bt2ctx_mtx);
1249242844Sjasone		malloc_mutex_lock(&prof_dump_seq_mtx);
1250242844Sjasone		for (i = 0; i < PROF_NCTX_LOCKS; i++)
1251242844Sjasone			malloc_mutex_lock(&ctx_locks[i]);
1252242844Sjasone	}
1253242844Sjasone}
1254242844Sjasone
1255242844Sjasonevoid
1256242844Sjasoneprof_postfork_parent(void)
1257242844Sjasone{
1258242844Sjasone
1259242844Sjasone	if (opt_prof) {
1260242844Sjasone		unsigned i;
1261242844Sjasone
1262242844Sjasone		for (i = 0; i < PROF_NCTX_LOCKS; i++)
1263242844Sjasone			malloc_mutex_postfork_parent(&ctx_locks[i]);
1264242844Sjasone		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
1265242844Sjasone		malloc_mutex_postfork_parent(&bt2ctx_mtx);
1266242844Sjasone	}
1267242844Sjasone}
1268242844Sjasone
1269242844Sjasonevoid
1270242844Sjasoneprof_postfork_child(void)
1271242844Sjasone{
1272242844Sjasone
1273242844Sjasone	if (opt_prof) {
1274242844Sjasone		unsigned i;
1275242844Sjasone
1276242844Sjasone		for (i = 0; i < PROF_NCTX_LOCKS; i++)
1277242844Sjasone			malloc_mutex_postfork_child(&ctx_locks[i]);
1278242844Sjasone		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
1279242844Sjasone		malloc_mutex_postfork_child(&bt2ctx_mtx);
1280242844Sjasone	}
1281242844Sjasone}
1282242844Sjasone
1283234370Sjasone/******************************************************************************/
1284