1/* 2 * Copyright (c) 2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_APACHE_LICENSE_HEADER_START@ 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 * @APPLE_APACHE_LICENSE_HEADER_END@ 19 */ 20/* 21 Thread.cpp 22 Registered Thread Management 23 Copyright (c) 2004-2011 Apple Inc. All rights reserved. 24 */ 25 26#include "Definitions.h" 27#include "Thread.h" 28#include "Zone.h" 29#include "ThreadLocalCollector.h" 30#include "BlockIterator.h" 31#include <crt_externs.h> 32 33#if defined(__i386__) || defined(__arm__) 34// 32-bit x86/arm use no red zone. 35#define C_RED_ZONE 0 36#elif defined(__x86_64__) 37// according to http://www.x86-64.org/documentation/abi.pdf (page 15) 38#define C_RED_ZONE 128 39#else 40#error Unknown Architecture 41#endif 42 43 44namespace Auto { 45 46 47 //----- Thread -----// 48 49 Thread::Thread(Zone *zone) 50 : _next(NULL), _zone(zone), _pthread(NULL), _thread(MACH_PORT_NULL), _stack_base(NULL), 51 _scanning(), _suspended(0), _stack_scan_peak(NULL), _tlc(NULL), _localAllocations(64), _localsGuard(SENTINEL_T_INITIALIZER), 52 _destructor_count(0), _in_collector(false), _tlc_watchdog_counter(0), _pending_count_accumulator(NULL) 53 { 54 bind(); 55 } 56 57 void Thread::flush_cache(AllocationCache &cache) { 58 usword_t count = 0, size = 0; 59 for (usword_t i = 1; i < AllocationCache::cache_size; ++i) { 60 FreeList &list = cache[i]; 61 const size_t blockSize = i * allocate_quantum_small; 62 while (void *block = list.pop()->address()) { 63 // mark the thread local block as global so that it can be collected 64 assert(_zone->in_subzone_memory(block)); 65 Subzone *subzone = Subzone::subzone(block); 66 subzone->admin()->mark_allocated(block, i, AUTO_MEMORY_UNSCANNED, false, false); 67 ++count; 68 size += blockSize; 69 } 70 } 71 _zone->adjust_allocation_counter(size); 72 } 73 74 Thread::~Thread() { 75 /* If any blocks remain in our local allocations list mark them as global. */ 76 /* We cannot reclaim them because we cannot finalize here. */ 77 if (_localAllocations.count() > 0) { 78 for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) { 79 void *block = _localAllocations[i]; 80 if (block) { 81 Subzone *subzone = Subzone::subzone(block); 82 subzone->make_global(subzone->quantum_index_unchecked(block)); 83 } 84 } 85 } 86 87 // release the per-thread allocation cache items 88 flush_cache(_allocation_cache[AUTO_MEMORY_SCANNED]); 89 flush_cache(_allocation_cache[AUTO_MEMORY_UNSCANNED]); 90 } 91 92 // 93 // bind 94 // 95 // Associate the Thread with the calling pthread. 96 // This declares the Zone's interest in scanning the calling pthread's stack during collections. 97 // 98 void Thread::bind() { 99 _pthread = pthread_self(); 100 _thread = pthread_mach_thread_np(_pthread); 101 // The Kernel stores stores the environment, and command line arguments on the main thread stack. 102 // Skip that to avoid false rooting from the character data. 103 _stack_base = pthread_main_np() ? align_down(**(void***)_NSGetArgv(), pointer_alignment) : pthread_get_stackaddr_np(_pthread); 104 _stack_scan_peak = _stack_base; 105 } 106 107 108 // 109 // unbind 110 // 111 // Disassociate the Thread from the calling pthread. 112 // May only be called from the same pthread that previously called bind(). 113 // unbind() synchronizes with stack scanning to ensure that if a stack scan is in progress 114 // the stack will remain available until scanning is complete. Returns true if the thread 115 // can be reclaimed immediately. 116 // 117 bool Thread::unbind() { 118 SpinLock lock(&_scanning.lock); 119 assert(!_scanning.state); 120 assert(pthread_self() == _pthread); 121 _pthread = NULL; 122 _thread = MACH_PORT_NULL; 123 _stack_base = NULL; 124 _stack_scan_peak = NULL; 125 return true; 126 } 127 128 bool Thread::lockForScanning() { 129 spin_lock(&_scanning.lock); 130 if (is_bound()) { 131 _scanning.state = true; 132 return true; 133 } 134 spin_unlock(&_scanning.lock); 135 return false; 136 } 137 138 void Thread::unlockForScanning() { 139 _scanning.state = false; 140 spin_unlock(&_scanning.lock); 141 } 142 143 144 struct enliven_do { 145 void operator ()(Subzone *subzone, usword_t q) { 146 if (!subzone->test_and_set_mark(q) && subzone->is_scanned(q)) 147 subzone->test_and_set_pending(q, true); 148 } 149 150 void operator ()(Large *large) { 151 if (!large->test_and_set_mark() && large->is_scanned()) 152 large->set_pending(); 153 } 154 }; 155 156 void Thread::enliven_block(void *block) { 157 enliven_do op; 158 blockDo(_zone, block, op); 159 } 160 161 162 // 163 // clear_stack 164 // 165 // clears stack memory from the current sp to the depth that was scanned by the last collection 166 // 167 void Thread::clear_stack() { 168 // We need to be careful about calling functions during stack clearing. 169 // We can't use bzero or the like to do the zeroing because we don't know how much stack they use. 170 // The amount to clear is typically small so just use a simple loop writing pointer sized NULL values. 171 void **sp = (void **)auto_get_sp(); 172 void **zero_addr = (void **)_stack_scan_peak; 173 _stack_scan_peak = sp; 174 while (zero_addr < sp) { 175 *zero_addr = NULL; 176 zero_addr++; 177 } 178 } 179 180 181 // 182 // flush_local_blocks 183 // 184 // empties the local allocations hash, making all blocks global 185 // 186 void Thread::flush_local_blocks() 187 { 188 Sentinel::assert_guarded(_localsGuard); 189 // This only gets called if the local block set grows much larger than expected. 190 uint32_t first = _localAllocations.firstOccupiedSlot(); 191 uint32_t last = _localAllocations.lastOccupiedSlot(); 192 for (uint32_t i = first; i <= last; i++) { 193 void *block = _localAllocations[i]; 194 if (block) { 195 Subzone *subzone = Subzone::subzone(block); 196 subzone->make_global(subzone->quantum_index(block)); 197 _localAllocations.remove(i); 198 } 199 } 200 // this will cause _localAllocations to resize down its internal pointer buffer 201 _localAllocations.grow(); 202 } 203 204 205 // 206 // reap_local_blocks 207 // 208 // finalize and free all local blocks without doing any scanning 209 // should only be called when it is known the stack is shallow and cannot root anything 210 // 211 void Thread::reap_all_local_blocks() 212 { 213 Sentinel guard(_localsGuard); 214 if (_localAllocations.count() > 0) { 215 ThreadLocalCollector tlc(_zone, NULL, *this); 216 tlc.reap_all(); 217 // this will cause _localAllocations to resize down its internal pointer buffer 218 _localAllocations.grow(); 219 } 220 } 221 222 223 // BlockRef FIXME: temporary glue code until all call sites convert to BlockRef. 224 template <> void Thread::block_escaped<void *>(void *block) { 225 Subzone *subzone; 226 if (!_zone->in_subzone_memory(block)) 227 return; 228 subzone = Subzone::subzone(block); 229 usword_t q; 230 if (!subzone->block_is_start(block, &q)) return; // we are not interested in non-block pointers 231 SubzoneBlockRef ref(subzone, q); 232 if (ref.is_thread_local()) block_escaped(ref); 233 } 234 235 // 236 // block_escaped 237 // 238 // a block is escaping the stack; remove it from local set (cheaply) 239 // 240 template <class BlockRef> void Thread::block_escaped_internal(BlockRef block) 241 { 242 assert(block.is_thread_local()); 243 void *addr = block.address(); 244 /* 245 It is possible that a thread might construct a pointer to a block which is local to another thread. 246 If that pointer gets stored through a write barrier then we wind up here. 247 It would be an error for the thread to dereference that pointer, but just storing it is technically ok. 248 We must be careful to validate that the block is local to *this* thread. 249 */ 250 if (auto_expect_false(block.is_local_garbage())) { 251 /* 252 If we see a local garbage block we must first ensure that it is local to the current thread. 253 If it is then we must evict any non-garbage blocks which are reachable from that block. 254 However, we don't currently have a way to discover when one thread local garbage block is 255 reachable from another thread local garbage block. The scanner is not equipped to handle that. 256 So we just escape all blocks reachable from the entire garbage list. This should be very rare. 257 Note that the garbage blocks themselves remain thread local garbage. Only reachable non-garbage 258 blocks are made global. 259 */ 260 261 // verify the block is in this thread's garbage list 262 if (_tlc && _tlc->block_in_garbage_list(addr)) { 263 _tlc->evict_local_garbage(); 264 } 265 } else { 266 Sentinel guard(_localsGuard); 267 // verify the block is local to this thread 268 if (_localAllocations.contains(addr)) { 269 if (block.should_scan_local_block()) { 270 ThreadLocalCollector scanner(_zone, NULL, *this); 271 scanner.eject_local_block(addr); 272 } 273 else { // just do the one 274 block.make_global(); 275 _localAllocations.remove(addr); 276 usword_t size = block.size(); 277 _zone->adjust_allocation_counter(size); 278 } 279 } 280 } 281 } 282 283#ifdef DEBUG 284 // In release builds the optimizer knows this never gets called. But we need it to link a debug build. 285 template <> void Thread::block_escaped_internal<class LargeBlockRef>(LargeBlockRef block) { 286 __builtin_trap(); 287 } 288#endif 289 290 // 291 // track_local_memcopy 292 // 293 // If dst is contained in a local scanned object, then if src is also scanned and has 294 // local objects that they, in turn, are marked stored as well as dest 295 // Otherwise, if dst is unknown, mark all local objects at src as escaped. 296 // Src might be the stack. 297 void Thread::track_local_memcopy(const void *src, void *dst, size_t size) { 298 Subzone *dstSubzone = NULL; 299 void *dstBase = NULL; 300 bool should_track_local = false; 301 if (_zone->in_subzone_memory((void *)dst)) { 302 dstSubzone = Subzone::subzone((void *)dst); 303 usword_t dst_q; 304 dstBase = dstSubzone->block_start((void *)dst, dst_q); 305 if (dstBase) { 306 // if memmoving within block bail early 307 size_t dstSize = dstSubzone->size(dst_q); 308 if (src > dstBase && src < ((char *)dstBase + dstSize)) 309 return; 310 if (dstSubzone->is_live_thread_local(dst_q) 311 && (dstSubzone->should_scan_local_block(dst_q) || dstSubzone->is_scanned(dst_q))) { 312 should_track_local = true; 313 } 314 } 315 } 316 void **start = (void **)src; 317 void **end = start + size/sizeof(void *); 318 bool dstStoredInto = false; 319 while (start < end) { 320 void *candidate = *start; 321 if (candidate) { 322 if (_zone->in_subzone_memory(candidate)) { 323 Subzone *candidateSubzone = Subzone::subzone(candidate); 324 usword_t q = candidateSubzone->quantum_index_unchecked(candidate); 325 if (q < candidateSubzone->allocation_limit() && candidateSubzone->is_live_thread_local(q)) {// && thread->_localAllocations.contains(candidate)) 326 if (should_track_local) { 327 dstStoredInto = true; 328 break; 329 } 330 else { 331 SubzoneBlockRef candidateRef(candidateSubzone, q); 332 block_escaped(candidateRef); 333 } 334 } 335 } 336 } 337 start++; 338 } 339 if (dstStoredInto) { 340 // we can only get here if dstBase is a valid block 341 dstSubzone->set_scan_local_block(dstSubzone->quantum_index_unchecked(dstBase)); 342 } 343 } 344 345 bool Thread::thread_cache_add(void *block, Subzone *subzone, usword_t q) { 346 // don't cache medium subzone blocks. 347 bool cached = false; 348 if (subzone->is_small()) { 349 usword_t n = subzone->length(q); 350 if (n <= max_cached_small_multiple) { 351 Admin *admin = subzone->admin(); 352 admin->mark_cached(subzone, q, n); 353 FreeList &list = allocation_cache(admin->layout())[n]; 354 list.push(block, (n << allocate_quantum_small_log2)); 355 cached = true; 356 } 357 } 358 return cached; 359 } 360 361 // 362 // scan_current_thread 363 // 364 // Scan the current thread stack and registers for block references. 365 // 366 void Thread::scan_current_thread(thread_scanner_t scanner, void *stack_bottom) { 367 // capture non-volatile registers 368 NonVolatileRegisters registers; 369 370 // scan the registers 371 Range range = registers.buffer_range(); 372 scanner(this, range); 373 374 // scan the stack 375 range.set_range(stack_bottom, _stack_base); 376 if (_stack_scan_peak > range.address()) { 377 _stack_scan_peak = range.address(); 378 } 379 scanner(this, range); 380 } 381 382#ifndef __BLOCKS__ 383 class thread_scanner_helper : public Thread::thread_scanner { 384 void (*_scanner) (Thread*, const Range&, void*); 385 void *_arg; 386 public: 387 thread_scanner_helper(void (*scanner) (Thread*, const Range&, void*), void *arg) : _scanner(scanner), _arg(arg) {} 388 virtual void operator() (Thread *thread, const Range &range) { _scanner(thread, range, _arg); } 389 }; 390#endif 391 392 void Thread::scan_current_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, void *stack_bottom) { 393#ifdef __BLOCKS__ 394 scan_current_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, stack_bottom); 395#else 396 thread_scanner_helper helper(scanner, arg); 397 scan_current_thread(helper, stack_bottom); 398#endif 399 } 400 401 union ThreadState { 402#if defined(__i386__) 403 i386_thread_state_t regs; 404#define THREAD_STATE_COUNT i386_THREAD_STATE_COUNT 405#define THREAD_STATE_FLAVOR i386_THREAD_STATE 406#define THREAD_STATE_SP __esp 407#elif defined(__x86_64__) 408 x86_thread_state64_t regs; 409#define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT 410#define THREAD_STATE_FLAVOR x86_THREAD_STATE64 411#define THREAD_STATE_SP __rsp 412#elif defined(__arm__) 413 arm_thread_state_t regs; 414#define THREAD_STATE_COUNT ARM_THREAD_STATE_COUNT 415#define THREAD_STATE_FLAVOR ARM_THREAD_STATE 416#define THREAD_STATE_SP __sp 417#else 418#error Unknown Architecture 419#endif 420 thread_state_data_t data; 421 422 void* get_stack_pointer() { 423 // <rdar://problem/6453396> always align the stack address to a pointer boundary. 424 return align_down(reinterpret_cast<void*>(regs.THREAD_STATE_SP - C_RED_ZONE), pointer_alignment); 425 } 426 }; 427 428 429 // 430 // get_register_state 431 // 432 // read another thread's registers 433 // 434 void Thread::get_register_state(ThreadState &state, unsigned &user_count) { 435 // select the register capture flavor 436 user_count = THREAD_STATE_COUNT; 437 thread_state_flavor_t flavor = THREAD_STATE_FLAVOR; 438 439 // get the thread register state 440 kern_return_t err = thread_get_state(_thread, flavor, state.data, &user_count); 441 uint64_t retryDelay = 1; 442 443 // We sometimes see KERN_ABORTED in conjunction with fork(). Typically a single retry succeeds in that case. 444 // We also see various other error codes during thread exit/teardown. Retry generously until the port is dead (MACH_SEND_INVALID_DEST) 445 // because otherwise we have a fatal error. Using a logarithmically increasing delay between iterations, which 446 // results in a TOTAL sleep time of 1.111111 seconds to let the dying thread settle before we give up. 447 while ((err != KERN_SUCCESS) && (err == KERN_ABORTED && retryDelay < 10 * NSEC_PER_SEC)) { 448 //malloc_printf("*** %s: unable to get thread state %d. Retrying (retry count: %d)\n", prelude(), err, retryCount); 449 struct timespec sleeptime; 450 sleeptime.tv_sec = retryDelay / NSEC_PER_SEC; 451 sleeptime.tv_nsec = retryDelay % NSEC_PER_SEC; 452 nanosleep(&sleeptime, NULL); 453 retryDelay *= 10; 454 err = thread_get_state(_thread, flavor, state.data, &user_count); 455 } 456 457 if (err) { 458 // this is a fatal error. the program will crash if we can't scan this thread's state. 459 char thread_description[256]; 460 description(thread_description, sizeof(thread_description)); 461 auto_fatal("get_register_state(): unable to get thread state: err = %d, %s\n", err, thread_description); 462 } 463 } 464 465 466 // 467 // scan_other_thread 468 // 469 // Scan a thread other than the current thread stack and registers for block references. 470 // 471 void Thread::scan_other_thread(thread_scanner_t scanner, bool withSuspend) { 472 // <rdar://problem/6398665&6456504> can only safely scan if this thread was locked. 473 assert(_scanning.state); 474 475 // suspend the thread while scanning its registers and stack. 476 if (withSuspend) suspend(); 477 478 unsigned user_count; 479 ThreadState state; 480 get_register_state(state, user_count); 481 482 // scan the registers 483 Range register_range((void *)state.data, user_count * sizeof(natural_t)); 484 scanner(this, register_range); 485 486 // scan the stack 487 Range stack_range(state.get_stack_pointer(), _stack_base); 488 if (_stack_scan_peak > stack_range.address()) { 489 _stack_scan_peak = stack_range.address(); 490 } 491 scanner(this, stack_range); 492 493 if (withSuspend) { 494 if (ThreadLocalCollector::should_collect_suspended(*this)) { 495 // Perform a TLC and pull the resulting garbage list into global garbage 496 ThreadLocalCollector tlc(_zone, state.get_stack_pointer(), *this); 497 // Blocks in the garbage list have already been marked by the roots scan. 498 // Since these blocks are known to be garbage, explicitly unmark them now to collect them in this cycle. 499 tlc.collect_suspended(register_range, stack_range); 500 } 501 resume(); 502 } 503 } 504 505 void Thread::scan_other_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, bool withSuspend) { 506#ifdef __BLOCKS__ 507 scan_other_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, withSuspend); 508#else 509 thread_scanner_helper helper(scanner, arg); 510 scan_other_thread(helper, withSuspend); 511#endif 512 } 513 514 // 515 // suspend 516 // 517 // Temporarily suspend the thread from further execution. Returns true if the thread is 518 // still alive. 519 // 520 void Thread::suspend() { 521 // do not suspend this thread 522 if (is_current_thread() || !is_bound()) return; 523 524 if (_suspended == 0) { 525 // request thread suspension 526 kern_return_t err = thread_suspend(_thread); 527 528 if (err != KERN_SUCCESS) { 529 char thread_description[256]; 530 description(thread_description, sizeof(thread_description)); 531 auto_fatal("Thread::suspend(): unable to suspend a thread: err = %d, %s\n", err, thread_description); 532 } 533 } 534 _suspended++; 535 } 536 537 538 // 539 // resume 540 // 541 // Resume a suspended thread. 542 // 543 void Thread::resume() { 544 // do not resume this thread 545 if (is_current_thread() || !is_bound()) return; 546 547 if (_suspended == 1) { 548 // request thread resumption 549 kern_return_t err = thread_resume(_thread); 550 551 if (err != KERN_SUCCESS) { 552 char thread_description[256]; 553 description(thread_description, sizeof(thread_description)); 554 auto_fatal("Thread::resume(): unable to resume a thread: err = %d, %s\n", err, thread_description); 555 } 556 } 557 _suspended--; 558 } 559 560 561 char *Thread::description(char *buf, size_t bufsz) { 562 if (_pthread == NULL) { 563 snprintf(buf, bufsz, "Thread %p: unbound", this); 564 } else { 565 snprintf(buf, bufsz, "Thread %p: _pthread = %p, _thread = 0x%x, _stack_base = %p, enlivening %s, %d local blocks", 566 this,_pthread, _thread, _stack_base, 567 needs_enlivening().state ? " on" : "off", _localAllocations.count()); 568 } 569 return buf; 570 } 571 572 573 extern "C" void auto_print_registered_threads() { 574 Zone *zone = Zone::zone(); 575 Mutex lock(zone->threads_mutex()); 576 Thread *thread = zone->threads(); 577 while (thread != NULL) { 578 char thread_description[256]; 579 thread->description(thread_description, sizeof(thread_description)); 580 malloc_printf("%s\n", thread_description); 581 thread = thread->next(); 582 } 583 } 584 585 586#ifdef __BLOCKS__ 587 // 588 // dump 589 // 590 void Thread::dump(auto_zone_stack_dump stack_dump, auto_zone_register_dump register_dump, auto_zone_node_dump dump_local_block) { 591 Range stack_range; 592 // dump the registers. 593 if (!is_bound()) return; 594 if (register_dump) { 595 if (is_current_thread()) { 596 NonVolatileRegisters registers; 597 598 // scan the registers 599 Range range = registers.buffer_range(); 600 //scanner.scan_range_from_registers(range, *this, 0); 601 register_dump(range.address(), range.size()); 602 stack_range.set_range(__builtin_frame_address(0), _stack_base); 603 } 604 else { 605 unsigned user_count; 606 ThreadState state; 607 get_register_state(state, user_count); 608 register_dump(&state.data, user_count * sizeof(void *)); 609 stack_range.set_range(state.get_stack_pointer(), _stack_base); 610 } 611 } 612 // dump the stack 613 if (stack_dump) stack_dump(stack_range.address(), stack_range.size()); 614#if 0 615unsafe; thread might be in the middle of an STL set grow; need to put new locks into a tracing build to get this info safely 616 // dump the locals 617 if (!dump_local_block) return; 618 for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) { 619 void *block = _localAllocations[i]; 620 if (block) { 621 Subzone *subzone = Subzone::subzone(block); 622 dump_local_block(block, subzone->size(block), subzone->layout(block), subzone->refcount(block)); 623 } 624 } 625#endif 626 } 627 628 void Thread::visit(auto_zone_visitor_t *visitor) { 629 // dump the registers. 630 if (!is_bound()) return; 631 if (is_current_thread()) { 632 // snapshot the stack range. 633 auto_address_range_t stack_range = { (void *)auto_get_sp(), _stack_base }; 634 635 // snapshot the registers. 636 NonVolatileRegisters registers; 637 Range range = registers.buffer_range(); 638 auto_address_range_t registers_range = { range.address(), range.end() }; 639 visitor->visit_thread(_pthread, stack_range, registers_range); 640 } else { 641 unsigned user_count; 642 ThreadState state; 643 get_register_state(state, user_count); 644 auto_address_range_t stack_range = { state.get_stack_pointer(), _stack_base }; 645 auto_address_range_t registers_range = { &state.data, &state.data[user_count] }; 646 visitor->visit_thread(_pthread, stack_range, registers_range); 647 } 648 } 649 650#endif /* __BLOCKS__ */ 651 652}; 653