1234370Sjasone/******************************************************************************/ 2234370Sjasone#ifdef JEMALLOC_H_TYPES 3234370Sjasone 4234370Sjasonetypedef struct prof_bt_s prof_bt_t; 5234370Sjasonetypedef struct prof_cnt_s prof_cnt_t; 6234370Sjasonetypedef struct prof_thr_cnt_s prof_thr_cnt_t; 7234370Sjasonetypedef struct prof_ctx_s prof_ctx_t; 8234370Sjasonetypedef struct prof_tdata_s prof_tdata_t; 9234370Sjasone 10234370Sjasone/* Option defaults. */ 11234370Sjasone#define PROF_PREFIX_DEFAULT "jeprof" 12234543Sjasone#define LG_PROF_SAMPLE_DEFAULT 19 13234370Sjasone#define LG_PROF_INTERVAL_DEFAULT -1 14234370Sjasone 15234370Sjasone/* 16234370Sjasone * Hard limit on stack backtrace depth. The version of prof_backtrace() that 17234370Sjasone * is based on __builtin_return_address() necessarily has a hard-coded number 18234370Sjasone * of backtrace frame handlers, and should be kept in sync with this setting. 19234370Sjasone */ 20234370Sjasone#define PROF_BT_MAX 128 21234370Sjasone 22234370Sjasone/* Maximum number of backtraces to store in each per thread LRU cache. */ 23234370Sjasone#define PROF_TCMAX 1024 24234370Sjasone 25234370Sjasone/* Initial hash table size. */ 26234370Sjasone#define PROF_CKH_MINITEMS 64 27234370Sjasone 28234370Sjasone/* Size of memory buffer to use when writing dump files. */ 29234370Sjasone#define PROF_DUMP_BUFSIZE 65536 30234370Sjasone 31234370Sjasone/* Size of stack-allocated buffer used by prof_printf(). */ 32234370Sjasone#define PROF_PRINTF_BUFSIZE 128 33234370Sjasone 34234370Sjasone/* 35234370Sjasone * Number of mutexes shared among all ctx's. No space is allocated for these 36234370Sjasone * unless profiling is enabled, so it's okay to over-provision. 37234370Sjasone */ 38234370Sjasone#define PROF_NCTX_LOCKS 1024 39234370Sjasone 40235238Sjasone/* 41235238Sjasone * prof_tdata pointers close to NULL are used to encode state information that 42235238Sjasone * is used for cleaning up during thread shutdown. 43235238Sjasone */ 44235238Sjasone#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) 45235238Sjasone#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) 46235238Sjasone#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY 47235238Sjasone 48234370Sjasone#endif /* JEMALLOC_H_TYPES */ 49234370Sjasone/******************************************************************************/ 50234370Sjasone#ifdef JEMALLOC_H_STRUCTS 51234370Sjasone 52234370Sjasonestruct prof_bt_s { 53234370Sjasone /* Backtrace, stored as len program counters. */ 54234370Sjasone void **vec; 55234370Sjasone unsigned len; 56234370Sjasone}; 57234370Sjasone 58234370Sjasone#ifdef JEMALLOC_PROF_LIBGCC 59234370Sjasone/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ 60234370Sjasonetypedef struct { 61234370Sjasone prof_bt_t *bt; 62234370Sjasone unsigned nignore; 63234370Sjasone unsigned max; 64234370Sjasone} prof_unwind_data_t; 65234370Sjasone#endif 66234370Sjasone 67234370Sjasonestruct prof_cnt_s { 68234370Sjasone /* 69234370Sjasone * Profiling counters. An allocation/deallocation pair can operate on 70234370Sjasone * different prof_thr_cnt_t objects that are linked into the same 71234370Sjasone * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go 72234370Sjasone * negative. In principle it is possible for the *bytes counters to 73234370Sjasone * overflow/underflow, but a general solution would require something 74234370Sjasone * like 128-bit counters; this implementation doesn't bother to solve 75234370Sjasone * that problem. 76234370Sjasone */ 77234370Sjasone int64_t curobjs; 78234370Sjasone int64_t curbytes; 79234370Sjasone uint64_t accumobjs; 80234370Sjasone uint64_t accumbytes; 81234370Sjasone}; 82234370Sjasone 83234370Sjasonestruct prof_thr_cnt_s { 84234370Sjasone /* Linkage into prof_ctx_t's cnts_ql. */ 85234370Sjasone ql_elm(prof_thr_cnt_t) cnts_link; 86234370Sjasone 87234370Sjasone /* Linkage into thread's LRU. */ 88234370Sjasone ql_elm(prof_thr_cnt_t) lru_link; 89234370Sjasone 90234370Sjasone /* 91234370Sjasone * Associated context. If a thread frees an object that it did not 92234370Sjasone * allocate, it is possible that the context is not cached in the 93234370Sjasone * thread's hash table, in which case it must be able to look up the 94234370Sjasone * context, insert a new prof_thr_cnt_t into the thread's hash table, 95234370Sjasone * and link it into the prof_ctx_t's cnts_ql. 96234370Sjasone */ 97234370Sjasone prof_ctx_t *ctx; 98234370Sjasone 99234370Sjasone /* 100234370Sjasone * Threads use memory barriers to update the counters. Since there is 101234370Sjasone * only ever one writer, the only challenge is for the reader to get a 102234370Sjasone * consistent read of the counters. 103234370Sjasone * 104234370Sjasone * The writer uses this series of operations: 105234370Sjasone * 106234370Sjasone * 1) Increment epoch to an odd number. 107234370Sjasone * 2) Update counters. 108234370Sjasone * 3) Increment epoch to an even number. 109234370Sjasone * 110234370Sjasone * The reader must assure 1) that the epoch is even while it reads the 111234370Sjasone * counters, and 2) that the epoch doesn't change between the time it 112234370Sjasone * starts and finishes reading the counters. 113234370Sjasone */ 114234370Sjasone unsigned epoch; 115234370Sjasone 116234370Sjasone /* Profiling counters. */ 117234370Sjasone prof_cnt_t cnts; 118234370Sjasone}; 119234370Sjasone 120234370Sjasonestruct prof_ctx_s { 121234370Sjasone /* Associated backtrace. */ 122234370Sjasone prof_bt_t *bt; 123234370Sjasone 124235238Sjasone /* Protects nlimbo, cnt_merged, and cnts_ql. */ 125234370Sjasone malloc_mutex_t *lock; 126234370Sjasone 127235238Sjasone /* 128235238Sjasone * Number of threads that currently cause this ctx to be in a state of 129235238Sjasone * limbo due to one of: 130235238Sjasone * - Initializing per thread counters associated with this ctx. 131235238Sjasone * - Preparing to destroy this ctx. 132235238Sjasone * nlimbo must be 1 (single destroyer) in order to safely destroy the 133235238Sjasone * ctx. 134235238Sjasone */ 135235238Sjasone unsigned nlimbo; 136235238Sjasone 137234370Sjasone /* Temporary storage for summation during dump. */ 138234370Sjasone prof_cnt_t cnt_summed; 139234370Sjasone 140234370Sjasone /* When threads exit, they merge their stats into cnt_merged. */ 141234370Sjasone prof_cnt_t cnt_merged; 142234370Sjasone 143234370Sjasone /* 144234370Sjasone * List of profile counters, one for each thread that has allocated in 145234370Sjasone * this context. 146234370Sjasone */ 147234370Sjasone ql_head(prof_thr_cnt_t) cnts_ql; 148234370Sjasone}; 149234370Sjasone 150234370Sjasonestruct prof_tdata_s { 151234370Sjasone /* 152234370Sjasone * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a 153234370Sjasone * cache of backtraces, with associated thread-specific prof_thr_cnt_t 154234370Sjasone * objects. Other threads may read the prof_thr_cnt_t contents, but no 155234370Sjasone * others will ever write them. 156234370Sjasone * 157234370Sjasone * Upon thread exit, the thread must merge all the prof_thr_cnt_t 158234370Sjasone * counter data into the associated prof_ctx_t objects, and unlink/free 159234370Sjasone * the prof_thr_cnt_t objects. 160234370Sjasone */ 161234370Sjasone ckh_t bt2cnt; 162234370Sjasone 163234370Sjasone /* LRU for contents of bt2cnt. */ 164234370Sjasone ql_head(prof_thr_cnt_t) lru_ql; 165234370Sjasone 166234370Sjasone /* Backtrace vector, used for calls to prof_backtrace(). */ 167234370Sjasone void **vec; 168234370Sjasone 169234370Sjasone /* Sampling state. */ 170234370Sjasone uint64_t prng_state; 171234370Sjasone uint64_t threshold; 172234370Sjasone uint64_t accum; 173235238Sjasone 174235238Sjasone /* State used to avoid dumping while operating on prof internals. */ 175235238Sjasone bool enq; 176235238Sjasone bool enq_idump; 177235238Sjasone bool enq_gdump; 178234370Sjasone}; 179234370Sjasone 180234370Sjasone#endif /* JEMALLOC_H_STRUCTS */ 181234370Sjasone/******************************************************************************/ 182234370Sjasone#ifdef JEMALLOC_H_EXTERNS 183234370Sjasone 184234370Sjasoneextern bool opt_prof; 185234370Sjasone/* 186234370Sjasone * Even if opt_prof is true, sampling can be temporarily disabled by setting 187234370Sjasone * opt_prof_active to false. No locking is used when updating opt_prof_active, 188234370Sjasone * so there are no guarantees regarding how long it will take for all threads 189234370Sjasone * to notice state changes. 190234370Sjasone */ 191234370Sjasoneextern bool opt_prof_active; 192234370Sjasoneextern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ 193234370Sjasoneextern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ 194234370Sjasoneextern bool opt_prof_gdump; /* High-water memory dumping. */ 195234543Sjasoneextern bool opt_prof_final; /* Final profile dumping. */ 196234370Sjasoneextern bool opt_prof_leak; /* Dump leak summary at exit. */ 197234370Sjasoneextern bool opt_prof_accum; /* Report cumulative bytes. */ 198234370Sjasoneextern char opt_prof_prefix[PATH_MAX + 1]; 199234370Sjasone 200234370Sjasone/* 201234370Sjasone * Profile dump interval, measured in bytes allocated. Each arena triggers a 202234370Sjasone * profile dump when it reaches this threshold. The effect is that the 203234370Sjasone * interval between profile dumps averages prof_interval, though the actual 204234370Sjasone * interval between dumps will tend to be sporadic, and the interval will be a 205234370Sjasone * maximum of approximately (prof_interval * narenas). 206234370Sjasone */ 207234370Sjasoneextern uint64_t prof_interval; 208234370Sjasone 209234370Sjasone/* 210234370Sjasone * If true, promote small sampled objects to large objects, since small run 211234370Sjasone * headers do not have embedded profile context pointers. 212234370Sjasone */ 213234370Sjasoneextern bool prof_promote; 214234370Sjasone 215234370Sjasonevoid bt_init(prof_bt_t *bt, void **vec); 216234370Sjasonevoid prof_backtrace(prof_bt_t *bt, unsigned nignore); 217234370Sjasoneprof_thr_cnt_t *prof_lookup(prof_bt_t *bt); 218234370Sjasonevoid prof_idump(void); 219234370Sjasonebool prof_mdump(const char *filename); 220234370Sjasonevoid prof_gdump(void); 221234370Sjasoneprof_tdata_t *prof_tdata_init(void); 222234370Sjasonevoid prof_tdata_cleanup(void *arg); 223234370Sjasonevoid prof_boot0(void); 224234370Sjasonevoid prof_boot1(void); 225234370Sjasonebool prof_boot2(void); 226242844Sjasonevoid prof_prefork(void); 227242844Sjasonevoid prof_postfork_parent(void); 228242844Sjasonevoid prof_postfork_child(void); 229234370Sjasone 230234370Sjasone#endif /* JEMALLOC_H_EXTERNS */ 231234370Sjasone/******************************************************************************/ 232234370Sjasone#ifdef JEMALLOC_H_INLINES 233234370Sjasone 234234370Sjasone#define PROF_ALLOC_PREP(nignore, size, ret) do { \ 235234370Sjasone prof_tdata_t *prof_tdata; \ 236234370Sjasone prof_bt_t bt; \ 237234370Sjasone \ 238234370Sjasone assert(size == s2u(size)); \ 239234370Sjasone \ 240251300Sjasone prof_tdata = prof_tdata_get(true); \ 241235238Sjasone if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ 242235238Sjasone if (prof_tdata != NULL) \ 243235238Sjasone ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ 244235238Sjasone else \ 245234370Sjasone ret = NULL; \ 246235238Sjasone break; \ 247234370Sjasone } \ 248234370Sjasone \ 249234370Sjasone if (opt_prof_active == false) { \ 250234370Sjasone /* Sampling is currently inactive, so avoid sampling. */\ 251234370Sjasone ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ 252234370Sjasone } else if (opt_lg_prof_sample == 0) { \ 253234370Sjasone /* Don't bother with sampling logic, since sampling */\ 254234370Sjasone /* interval is 1. */\ 255234370Sjasone bt_init(&bt, prof_tdata->vec); \ 256234370Sjasone prof_backtrace(&bt, nignore); \ 257234370Sjasone ret = prof_lookup(&bt); \ 258234370Sjasone } else { \ 259234370Sjasone if (prof_tdata->threshold == 0) { \ 260234370Sjasone /* Initialize. Seed the prng differently for */\ 261234370Sjasone /* each thread. */\ 262234370Sjasone prof_tdata->prng_state = \ 263234370Sjasone (uint64_t)(uintptr_t)&size; \ 264234370Sjasone prof_sample_threshold_update(prof_tdata); \ 265234370Sjasone } \ 266234370Sjasone \ 267234370Sjasone /* Determine whether to capture a backtrace based on */\ 268234370Sjasone /* whether size is enough for prof_accum to reach */\ 269234370Sjasone /* prof_tdata->threshold. However, delay updating */\ 270234370Sjasone /* these variables until prof_{m,re}alloc(), because */\ 271234370Sjasone /* we don't know for sure that the allocation will */\ 272234370Sjasone /* succeed. */\ 273234370Sjasone /* */\ 274234370Sjasone /* Use subtraction rather than addition to avoid */\ 275234370Sjasone /* potential integer overflow. */\ 276234370Sjasone if (size >= prof_tdata->threshold - \ 277234370Sjasone prof_tdata->accum) { \ 278234370Sjasone bt_init(&bt, prof_tdata->vec); \ 279234370Sjasone prof_backtrace(&bt, nignore); \ 280234370Sjasone ret = prof_lookup(&bt); \ 281234370Sjasone } else \ 282234370Sjasone ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ 283234370Sjasone } \ 284234370Sjasone} while (0) 285234370Sjasone 286234370Sjasone#ifndef JEMALLOC_ENABLE_INLINE 287234370Sjasonemalloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) 288234370Sjasone 289251300Sjasoneprof_tdata_t *prof_tdata_get(bool create); 290234370Sjasonevoid prof_sample_threshold_update(prof_tdata_t *prof_tdata); 291234370Sjasoneprof_ctx_t *prof_ctx_get(const void *ptr); 292234370Sjasonevoid prof_ctx_set(const void *ptr, prof_ctx_t *ctx); 293234370Sjasonebool prof_sample_accum_update(size_t size); 294234370Sjasonevoid prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); 295234370Sjasonevoid prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, 296234370Sjasone size_t old_size, prof_ctx_t *old_ctx); 297234370Sjasonevoid prof_free(const void *ptr, size_t size); 298234370Sjasone#endif 299234370Sjasone 300234370Sjasone#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) 301234370Sjasone/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ 302234370Sjasonemalloc_tsd_externs(prof_tdata, prof_tdata_t *) 303234370Sjasonemalloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, 304234370Sjasone prof_tdata_cleanup) 305234370Sjasone 306235238SjasoneJEMALLOC_INLINE prof_tdata_t * 307251300Sjasoneprof_tdata_get(bool create) 308235238Sjasone{ 309235238Sjasone prof_tdata_t *prof_tdata; 310235238Sjasone 311235238Sjasone cassert(config_prof); 312235238Sjasone 313235238Sjasone prof_tdata = *prof_tdata_tsd_get(); 314251300Sjasone if (create && prof_tdata == NULL) 315251300Sjasone prof_tdata = prof_tdata_init(); 316235238Sjasone 317235238Sjasone return (prof_tdata); 318235238Sjasone} 319235238Sjasone 320234370SjasoneJEMALLOC_INLINE void 321234370Sjasoneprof_sample_threshold_update(prof_tdata_t *prof_tdata) 322234370Sjasone{ 323234370Sjasone uint64_t r; 324234370Sjasone double u; 325234370Sjasone 326234370Sjasone cassert(config_prof); 327234370Sjasone 328234370Sjasone /* 329234370Sjasone * Compute sample threshold as a geometrically distributed random 330234370Sjasone * variable with mean (2^opt_lg_prof_sample). 331234370Sjasone * 332234370Sjasone * __ __ 333234370Sjasone * | log(u) | 1 334234370Sjasone * prof_tdata->threshold = | -------- |, where p = ------------------- 335234370Sjasone * | log(1-p) | opt_lg_prof_sample 336234370Sjasone * 2 337234370Sjasone * 338234370Sjasone * For more information on the math, see: 339234370Sjasone * 340234370Sjasone * Non-Uniform Random Variate Generation 341234370Sjasone * Luc Devroye 342234370Sjasone * Springer-Verlag, New York, 1986 343234370Sjasone * pp 500 344234370Sjasone * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) 345234370Sjasone */ 346234370Sjasone prng64(r, 53, prof_tdata->prng_state, 347234370Sjasone UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); 348234370Sjasone u = (double)r * (1.0/9007199254740992.0L); 349234370Sjasone prof_tdata->threshold = (uint64_t)(log(u) / 350234370Sjasone log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) 351234370Sjasone + (uint64_t)1U; 352234370Sjasone} 353234370Sjasone 354234370SjasoneJEMALLOC_INLINE prof_ctx_t * 355234370Sjasoneprof_ctx_get(const void *ptr) 356234370Sjasone{ 357234370Sjasone prof_ctx_t *ret; 358234370Sjasone arena_chunk_t *chunk; 359234370Sjasone 360234370Sjasone cassert(config_prof); 361234370Sjasone assert(ptr != NULL); 362234370Sjasone 363234370Sjasone chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 364234370Sjasone if (chunk != ptr) { 365234370Sjasone /* Region. */ 366234370Sjasone ret = arena_prof_ctx_get(ptr); 367234370Sjasone } else 368234370Sjasone ret = huge_prof_ctx_get(ptr); 369234370Sjasone 370234370Sjasone return (ret); 371234370Sjasone} 372234370Sjasone 373234370SjasoneJEMALLOC_INLINE void 374234370Sjasoneprof_ctx_set(const void *ptr, prof_ctx_t *ctx) 375234370Sjasone{ 376234370Sjasone arena_chunk_t *chunk; 377234370Sjasone 378234370Sjasone cassert(config_prof); 379234370Sjasone assert(ptr != NULL); 380234370Sjasone 381234370Sjasone chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 382234370Sjasone if (chunk != ptr) { 383234370Sjasone /* Region. */ 384234370Sjasone arena_prof_ctx_set(ptr, ctx); 385234370Sjasone } else 386234370Sjasone huge_prof_ctx_set(ptr, ctx); 387234370Sjasone} 388234370Sjasone 389234370SjasoneJEMALLOC_INLINE bool 390234370Sjasoneprof_sample_accum_update(size_t size) 391234370Sjasone{ 392234370Sjasone prof_tdata_t *prof_tdata; 393234370Sjasone 394234370Sjasone cassert(config_prof); 395234370Sjasone /* Sampling logic is unnecessary if the interval is 1. */ 396234370Sjasone assert(opt_lg_prof_sample != 0); 397234370Sjasone 398251300Sjasone prof_tdata = prof_tdata_get(false); 399235238Sjasone if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 400235238Sjasone return (true); 401234370Sjasone 402234370Sjasone /* Take care to avoid integer overflow. */ 403234370Sjasone if (size >= prof_tdata->threshold - prof_tdata->accum) { 404234370Sjasone prof_tdata->accum -= (prof_tdata->threshold - size); 405234370Sjasone /* Compute new sample threshold. */ 406234370Sjasone prof_sample_threshold_update(prof_tdata); 407234370Sjasone while (prof_tdata->accum >= prof_tdata->threshold) { 408234370Sjasone prof_tdata->accum -= prof_tdata->threshold; 409234370Sjasone prof_sample_threshold_update(prof_tdata); 410234370Sjasone } 411234370Sjasone return (false); 412234370Sjasone } else { 413234370Sjasone prof_tdata->accum += size; 414234370Sjasone return (true); 415234370Sjasone } 416234370Sjasone} 417234370Sjasone 418234370SjasoneJEMALLOC_INLINE void 419234370Sjasoneprof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) 420234370Sjasone{ 421234370Sjasone 422234370Sjasone cassert(config_prof); 423234370Sjasone assert(ptr != NULL); 424234370Sjasone assert(size == isalloc(ptr, true)); 425234370Sjasone 426234370Sjasone if (opt_lg_prof_sample != 0) { 427234370Sjasone if (prof_sample_accum_update(size)) { 428234370Sjasone /* 429234370Sjasone * Don't sample. For malloc()-like allocation, it is 430234370Sjasone * always possible to tell in advance how large an 431234370Sjasone * object's usable size will be, so there should never 432234370Sjasone * be a difference between the size passed to 433234370Sjasone * PROF_ALLOC_PREP() and prof_malloc(). 434234370Sjasone */ 435234370Sjasone assert((uintptr_t)cnt == (uintptr_t)1U); 436234370Sjasone } 437234370Sjasone } 438234370Sjasone 439234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) { 440234370Sjasone prof_ctx_set(ptr, cnt->ctx); 441234370Sjasone 442234370Sjasone cnt->epoch++; 443234370Sjasone /*********/ 444234370Sjasone mb_write(); 445234370Sjasone /*********/ 446234370Sjasone cnt->cnts.curobjs++; 447234370Sjasone cnt->cnts.curbytes += size; 448234370Sjasone if (opt_prof_accum) { 449234370Sjasone cnt->cnts.accumobjs++; 450234370Sjasone cnt->cnts.accumbytes += size; 451234370Sjasone } 452234370Sjasone /*********/ 453234370Sjasone mb_write(); 454234370Sjasone /*********/ 455234370Sjasone cnt->epoch++; 456234370Sjasone /*********/ 457234370Sjasone mb_write(); 458234370Sjasone /*********/ 459234370Sjasone } else 460234370Sjasone prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); 461234370Sjasone} 462234370Sjasone 463234370SjasoneJEMALLOC_INLINE void 464234370Sjasoneprof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, 465234370Sjasone size_t old_size, prof_ctx_t *old_ctx) 466234370Sjasone{ 467234370Sjasone prof_thr_cnt_t *told_cnt; 468234370Sjasone 469234370Sjasone cassert(config_prof); 470234370Sjasone assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); 471234370Sjasone 472234370Sjasone if (ptr != NULL) { 473234370Sjasone assert(size == isalloc(ptr, true)); 474234370Sjasone if (opt_lg_prof_sample != 0) { 475234370Sjasone if (prof_sample_accum_update(size)) { 476234370Sjasone /* 477234370Sjasone * Don't sample. The size passed to 478234370Sjasone * PROF_ALLOC_PREP() was larger than what 479234370Sjasone * actually got allocated, so a backtrace was 480234370Sjasone * captured for this allocation, even though 481234370Sjasone * its actual size was insufficient to cross 482234370Sjasone * the sample threshold. 483234370Sjasone */ 484234370Sjasone cnt = (prof_thr_cnt_t *)(uintptr_t)1U; 485234370Sjasone } 486234370Sjasone } 487234370Sjasone } 488234370Sjasone 489234370Sjasone if ((uintptr_t)old_ctx > (uintptr_t)1U) { 490234370Sjasone told_cnt = prof_lookup(old_ctx->bt); 491234370Sjasone if (told_cnt == NULL) { 492234370Sjasone /* 493234370Sjasone * It's too late to propagate OOM for this realloc(), 494234370Sjasone * so operate directly on old_cnt->ctx->cnt_merged. 495234370Sjasone */ 496234370Sjasone malloc_mutex_lock(old_ctx->lock); 497234370Sjasone old_ctx->cnt_merged.curobjs--; 498234370Sjasone old_ctx->cnt_merged.curbytes -= old_size; 499234370Sjasone malloc_mutex_unlock(old_ctx->lock); 500234370Sjasone told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; 501234370Sjasone } 502234370Sjasone } else 503234370Sjasone told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; 504234370Sjasone 505234370Sjasone if ((uintptr_t)told_cnt > (uintptr_t)1U) 506234370Sjasone told_cnt->epoch++; 507234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) { 508234370Sjasone prof_ctx_set(ptr, cnt->ctx); 509234370Sjasone cnt->epoch++; 510242844Sjasone } else if (ptr != NULL) 511234370Sjasone prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); 512234370Sjasone /*********/ 513234370Sjasone mb_write(); 514234370Sjasone /*********/ 515234370Sjasone if ((uintptr_t)told_cnt > (uintptr_t)1U) { 516234370Sjasone told_cnt->cnts.curobjs--; 517234370Sjasone told_cnt->cnts.curbytes -= old_size; 518234370Sjasone } 519234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) { 520234370Sjasone cnt->cnts.curobjs++; 521234370Sjasone cnt->cnts.curbytes += size; 522234370Sjasone if (opt_prof_accum) { 523234370Sjasone cnt->cnts.accumobjs++; 524234370Sjasone cnt->cnts.accumbytes += size; 525234370Sjasone } 526234370Sjasone } 527234370Sjasone /*********/ 528234370Sjasone mb_write(); 529234370Sjasone /*********/ 530234370Sjasone if ((uintptr_t)told_cnt > (uintptr_t)1U) 531234370Sjasone told_cnt->epoch++; 532234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) 533234370Sjasone cnt->epoch++; 534234370Sjasone /*********/ 535234370Sjasone mb_write(); /* Not strictly necessary. */ 536234370Sjasone} 537234370Sjasone 538234370SjasoneJEMALLOC_INLINE void 539234370Sjasoneprof_free(const void *ptr, size_t size) 540234370Sjasone{ 541234370Sjasone prof_ctx_t *ctx = prof_ctx_get(ptr); 542234370Sjasone 543234370Sjasone cassert(config_prof); 544234370Sjasone 545234370Sjasone if ((uintptr_t)ctx > (uintptr_t)1) { 546235238Sjasone prof_thr_cnt_t *tcnt; 547234370Sjasone assert(size == isalloc(ptr, true)); 548235238Sjasone tcnt = prof_lookup(ctx->bt); 549234370Sjasone 550234370Sjasone if (tcnt != NULL) { 551234370Sjasone tcnt->epoch++; 552234370Sjasone /*********/ 553234370Sjasone mb_write(); 554234370Sjasone /*********/ 555234370Sjasone tcnt->cnts.curobjs--; 556234370Sjasone tcnt->cnts.curbytes -= size; 557234370Sjasone /*********/ 558234370Sjasone mb_write(); 559234370Sjasone /*********/ 560234370Sjasone tcnt->epoch++; 561234370Sjasone /*********/ 562234370Sjasone mb_write(); 563234370Sjasone /*********/ 564234370Sjasone } else { 565234370Sjasone /* 566234370Sjasone * OOM during free() cannot be propagated, so operate 567234370Sjasone * directly on cnt->ctx->cnt_merged. 568234370Sjasone */ 569234370Sjasone malloc_mutex_lock(ctx->lock); 570234370Sjasone ctx->cnt_merged.curobjs--; 571234370Sjasone ctx->cnt_merged.curbytes -= size; 572234370Sjasone malloc_mutex_unlock(ctx->lock); 573234370Sjasone } 574234370Sjasone } 575234370Sjasone} 576234370Sjasone#endif 577234370Sjasone 578234370Sjasone#endif /* JEMALLOC_H_INLINES */ 579234370Sjasone/******************************************************************************/ 580