1/* 2 * Copyright (c) 2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_APACHE_LICENSE_HEADER_START@ 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 * @APPLE_APACHE_LICENSE_HEADER_END@ 19 */ 20/* 21 Thread.cpp 22 Registered Thread Management 23 Copyright (c) 2004-2011 Apple Inc. All rights reserved. 24 */ 25 26#include "Definitions.h" 27#include "Thread.h" 28#include "Zone.h" 29#include "ThreadLocalCollector.h" 30#include "BlockIterator.h" 31#include <crt_externs.h> 32 33#if defined(__i386__) || defined(__arm__) 34// 32-bit x86/arm use no red zone. 35#define C_RED_ZONE 0 36#elif defined(__x86_64__) 37// according to http://www.x86-64.org/documentation/abi.pdf (page 15) 38#define C_RED_ZONE 128 39#else 40#error Unknown Architecture 41#endif 42 43 44namespace Auto { 45 46 47 //----- Thread -----// 48 49 Thread::Thread(Zone *zone) 50 : _next(NULL), _zone(zone), _pthread(NULL), _thread(MACH_PORT_NULL), _stack_base(NULL), 51 _scanning(), _suspended(0), _stack_scan_peak(NULL), _tlc(NULL), _localAllocations(64), _localsGuard(SENTINEL_T_INITIALIZER), 52 _destructor_count(0), _in_collector(false), _tlc_watchdog_counter(0), _pending_count_accumulator(NULL) 53 { 54 bind(); 55 } 56 57 void Thread::flush_cache(AllocationCache &cache) { 58 usword_t count = 0, size = 0; 59 for (usword_t i = 1; i < AllocationCache::cache_size; ++i) { 60 FreeList &list = cache[i]; 61 const size_t blockSize = i * allocate_quantum_small; 62 while (void *block = list.pop()->address()) { 63 // mark the thread local block as global so that it can be collected 64 assert(_zone->in_subzone_memory(block)); 65 Subzone *subzone = Subzone::subzone(block); 66 subzone->admin()->mark_allocated(block, i, AUTO_MEMORY_UNSCANNED, false, false); 67 ++count; 68 size += blockSize; 69 } 70 } 71 _zone->adjust_allocation_counter(size); 72 } 73 74 Thread::~Thread() { 75 /* If any blocks remain in our local allocations list mark them as global. */ 76 /* We cannot reclaim them because we cannot finalize here. */ 77 if (_localAllocations.count() > 0) { 78 for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) { 79 void *block = _localAllocations[i]; 80 if (block) { 81 Subzone *subzone = Subzone::subzone(block); 82 subzone->make_global(subzone->quantum_index_unchecked(block)); 83 } 84 } 85 } 86 87 // release the per-thread allocation cache items 88 flush_cache(_allocation_cache[AUTO_MEMORY_SCANNED]); 89 flush_cache(_allocation_cache[AUTO_MEMORY_UNSCANNED]); 90 } 91 92 // 93 // bind 94 // 95 // Associate the Thread with the calling pthread. 96 // This declares the Zone's interest in scanning the calling pthread's stack during collections. 97 // 98 void Thread::bind() { 99 _pthread = pthread_self(); 100 _thread = pthread_mach_thread_np(_pthread); 101 // The Kernel stores stores the environment, and command line arguments on the main thread stack. 102 // Skip that to avoid false rooting from the character data. 103 _stack_base = pthread_main_np() ? align_down(**(void***)_NSGetArgv(), pointer_alignment) : pthread_get_stackaddr_np(_pthread); 104 _stack_scan_peak = _stack_base; 105 } 106 107 108 // 109 // unbind 110 // 111 // Disassociate the Thread from the calling pthread. 112 // May only be called from the same pthread that previously called bind(). 113 // unbind() synchronizes with stack scanning to ensure that if a stack scan is in progress 114 // the stack will remain available until scanning is complete. Returns true if the thread 115 // can be reclaimed immediately. 116 // 117 bool Thread::unbind() { 118 SpinLock lock(&_scanning.lock); 119 assert(!_scanning.state); 120 assert(pthread_self() == _pthread); 121 _pthread = NULL; 122 _thread = MACH_PORT_NULL; 123 _stack_base = NULL; 124 _stack_scan_peak = NULL; 125 return true; 126 } 127 128 bool Thread::lockForScanning() { 129 spin_lock(&_scanning.lock); 130 if (is_bound()) { 131 _scanning.state = true; 132 return true; 133 } 134 spin_unlock(&_scanning.lock); 135 return false; 136 } 137 138 void Thread::unlockForScanning() { 139 _scanning.state = false; 140 spin_unlock(&_scanning.lock); 141 } 142 143 144 struct enliven_do { 145 void operator ()(Subzone *subzone, usword_t q) { 146 if (!subzone->test_and_set_mark(q) && subzone->is_scanned(q)) 147 subzone->test_and_set_pending(q, true); 148 } 149 150 void operator ()(Large *large) { 151 if (!large->test_and_set_mark() && large->is_scanned()) 152 large->set_pending(); 153 } 154 }; 155 156 void Thread::enliven_block(void *block) { 157 enliven_do op; 158 blockDo(_zone, block, op); 159 } 160 161 162 // 163 // flush_local_blocks 164 // 165 // empties the local allocations hash, making all blocks global 166 // 167 void Thread::flush_local_blocks() 168 { 169 Sentinel::assert_guarded(_localsGuard); 170 // This only gets called if the local block set grows much larger than expected. 171 uint32_t first = _localAllocations.firstOccupiedSlot(); 172 uint32_t last = _localAllocations.lastOccupiedSlot(); 173 for (uint32_t i = first; i <= last; i++) { 174 void *block = _localAllocations[i]; 175 if (block) { 176 Subzone *subzone = Subzone::subzone(block); 177 subzone->make_global(subzone->quantum_index(block)); 178 _localAllocations.remove(i); 179 } 180 } 181 // this will cause _localAllocations to resize down its internal pointer buffer 182 _localAllocations.grow(); 183 } 184 185 186 // 187 // reap_local_blocks 188 // 189 // finalize and free all local blocks without doing any scanning 190 // should only be called when it is known the stack is shallow and cannot root anything 191 // 192 void Thread::reap_all_local_blocks() 193 { 194 Sentinel guard(_localsGuard); 195 if (_localAllocations.count() > 0) { 196 ThreadLocalCollector tlc(_zone, NULL, *this); 197 tlc.reap_all(); 198 // this will cause _localAllocations to resize down its internal pointer buffer 199 _localAllocations.grow(); 200 } 201 } 202 203 204 // BlockRef FIXME: temporary glue code until all call sites convert to BlockRef. 205 template <> void Thread::block_escaped<void *>(void *block) { 206 Subzone *subzone; 207 if (!_zone->in_subzone_memory(block)) 208 return; 209 subzone = Subzone::subzone(block); 210 usword_t q; 211 if (!subzone->block_is_start(block, &q)) return; // we are not interested in non-block pointers 212 SubzoneBlockRef ref(subzone, q); 213 if (ref.is_thread_local()) block_escaped(ref); 214 } 215 216 // 217 // block_escaped 218 // 219 // a block is escaping the stack; remove it from local set (cheaply) 220 // 221 template <class BlockRef> void Thread::block_escaped_internal(BlockRef block) 222 { 223 assert(block.is_thread_local()); 224 void *addr = block.address(); 225 /* 226 It is possible that a thread might construct a pointer to a block which is local to another thread. 227 If that pointer gets stored through a write barrier then we wind up here. 228 It would be an error for the thread to dereference that pointer, but just storing it is technically ok. 229 We must be careful to validate that the block is local to *this* thread. 230 */ 231 if (auto_expect_false(block.is_local_garbage())) { 232 /* 233 If we see a local garbage block we must first ensure that it is local to the current thread. 234 If it is then we must evict any non-garbage blocks which are reachable from that block. 235 However, we don't currently have a way to discover when one thread local garbage block is 236 reachable from another thread local garbage block. The scanner is not equipped to handle that. 237 So we just escape all blocks reachable from the entire garbage list. This should be very rare. 238 Note that the garbage blocks themselves remain thread local garbage. Only reachable non-garbage 239 blocks are made global. 240 */ 241 242 // verify the block is in this thread's garbage list 243 if (_tlc && _tlc->block_in_garbage_list(addr)) { 244 _tlc->evict_local_garbage(); 245 } 246 } else { 247 Sentinel guard(_localsGuard); 248 // verify the block is local to this thread 249 if (_localAllocations.contains(addr)) { 250 if (block.should_scan_local_block()) { 251 ThreadLocalCollector scanner(_zone, NULL, *this); 252 scanner.eject_local_block(addr); 253 } 254 else { // just do the one 255 block.make_global(); 256 _localAllocations.remove(addr); 257 usword_t size = block.size(); 258 _zone->adjust_allocation_counter(size); 259 } 260 } 261 } 262 } 263 264#ifdef DEBUG 265 // In release builds the optimizer knows this never gets called. But we need it to link a debug build. 266 template <> void Thread::block_escaped_internal<class LargeBlockRef>(LargeBlockRef block) { 267 __builtin_trap(); 268 } 269#endif 270 271 // 272 // track_local_memcopy 273 // 274 // If dst is contained in a local scanned object, then if src is also scanned and has 275 // local objects that they, in turn, are marked stored as well as dest 276 // Otherwise, if dst is unknown, mark all local objects at src as escaped. 277 // Src might be the stack. 278 void Thread::track_local_memcopy(const void *src, void *dst, size_t size) { 279 Subzone *dstSubzone = NULL; 280 void *dstBase = NULL; 281 bool should_track_local = false; 282 if (_zone->in_subzone_memory((void *)dst)) { 283 dstSubzone = Subzone::subzone((void *)dst); 284 usword_t dst_q; 285 dstBase = dstSubzone->block_start((void *)dst, dst_q); 286 if (dstBase) { 287 // if memmoving within block bail early 288 size_t dstSize = dstSubzone->size(dst_q); 289 if (src > dstBase && src < ((char *)dstBase + dstSize)) 290 return; 291 if (dstSubzone->is_live_thread_local(dst_q) 292 && (dstSubzone->should_scan_local_block(dst_q) || dstSubzone->is_scanned(dst_q))) { 293 should_track_local = true; 294 } 295 } 296 } 297 void **start = (void **)src; 298 void **end = start + size/sizeof(void *); 299 bool dstStoredInto = false; 300 while (start < end) { 301 void *candidate = *start; 302 if (candidate) { 303 if (_zone->in_subzone_memory(candidate)) { 304 Subzone *candidateSubzone = Subzone::subzone(candidate); 305 usword_t q = candidateSubzone->quantum_index_unchecked(candidate); 306 if (q < candidateSubzone->allocation_limit() && candidateSubzone->is_live_thread_local(q)) {// && thread->_localAllocations.contains(candidate)) 307 if (should_track_local) { 308 dstStoredInto = true; 309 break; 310 } 311 else { 312 SubzoneBlockRef candidateRef(candidateSubzone, q); 313 block_escaped(candidateRef); 314 } 315 } 316 } 317 } 318 start++; 319 } 320 if (dstStoredInto) { 321 // we can only get here if dstBase is a valid block 322 dstSubzone->set_scan_local_block(dstSubzone->quantum_index_unchecked(dstBase)); 323 } 324 } 325 326 bool Thread::thread_cache_add(void *block, Subzone *subzone, usword_t q) { 327 // don't cache medium subzone blocks. 328 bool cached = false; 329 if (subzone->is_small()) { 330 usword_t n = subzone->length(q); 331 if (n <= max_cached_small_multiple) { 332 Admin *admin = subzone->admin(); 333 admin->mark_cached(subzone, q, n); 334 FreeList &list = allocation_cache(admin->layout())[n]; 335 list.push(block, (n << allocate_quantum_small_log2)); 336 cached = true; 337 } 338 } 339 return cached; 340 } 341 342 // 343 // scan_current_thread 344 // 345 // Scan the current thread stack and registers for block references. 346 // 347 void Thread::scan_current_thread(thread_scanner_t scanner, void *stack_bottom) { 348 // capture non-volatile registers 349 NonVolatileRegisters registers; 350 351 // scan the registers 352 Range range = registers.buffer_range(); 353 scanner(this, range); 354 355 // scan the stack 356 range.set_range(stack_bottom, _stack_base); 357 if (_stack_scan_peak > range.address()) { 358 _stack_scan_peak = range.address(); 359 } 360 scanner(this, range); 361 } 362 363#ifndef __BLOCKS__ 364 class thread_scanner_helper : public Thread::thread_scanner { 365 void (*_scanner) (Thread*, const Range&, void*); 366 void *_arg; 367 public: 368 thread_scanner_helper(void (*scanner) (Thread*, const Range&, void*), void *arg) : _scanner(scanner), _arg(arg) {} 369 virtual void operator() (Thread *thread, const Range &range) { _scanner(thread, range, _arg); } 370 }; 371#endif 372 373 void Thread::scan_current_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, void *stack_bottom) { 374#ifdef __BLOCKS__ 375 scan_current_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, stack_bottom); 376#else 377 thread_scanner_helper helper(scanner, arg); 378 scan_current_thread(helper, stack_bottom); 379#endif 380 } 381 382 union ThreadState { 383#if defined(__i386__) 384 i386_thread_state_t regs; 385#define THREAD_STATE_COUNT i386_THREAD_STATE_COUNT 386#define THREAD_STATE_FLAVOR i386_THREAD_STATE 387#define THREAD_STATE_SP __esp 388#elif defined(__x86_64__) 389 x86_thread_state64_t regs; 390#define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT 391#define THREAD_STATE_FLAVOR x86_THREAD_STATE64 392#define THREAD_STATE_SP __rsp 393#elif defined(__arm__) 394 arm_thread_state_t regs; 395#define THREAD_STATE_COUNT ARM_THREAD_STATE_COUNT 396#define THREAD_STATE_FLAVOR ARM_THREAD_STATE 397#define THREAD_STATE_SP __sp 398#else 399#error Unknown Architecture 400#endif 401 thread_state_data_t data; 402 403 void* get_stack_pointer() { 404 // <rdar://problem/6453396> always align the stack address to a pointer boundary. 405 return align_down(reinterpret_cast<void*>(regs.THREAD_STATE_SP - C_RED_ZONE), pointer_alignment); 406 } 407 }; 408 409 410 // 411 // get_register_state 412 // 413 // read another thread's registers 414 // 415 void Thread::get_register_state(ThreadState &state, unsigned &user_count) { 416 // select the register capture flavor 417 user_count = THREAD_STATE_COUNT; 418 thread_state_flavor_t flavor = THREAD_STATE_FLAVOR; 419 420 // get the thread register state 421 kern_return_t err = thread_get_state(_thread, flavor, state.data, &user_count); 422 uint64_t retryDelay = 1; 423 424 // We sometimes see KERN_ABORTED in conjunction with fork(). Typically a single retry succeeds in that case. 425 // We also see various other error codes during thread exit/teardown. Retry generously until the port is dead (MACH_SEND_INVALID_DEST) 426 // because otherwise we have a fatal error. Using a logarithmically increasing delay between iterations, which 427 // results in a TOTAL sleep time of 1.111111 seconds to let the dying thread settle before we give up. 428 while ((err != KERN_SUCCESS) && (err == KERN_ABORTED && retryDelay < 10 * NSEC_PER_SEC)) { 429 //malloc_printf("*** %s: unable to get thread state %d. Retrying (retry count: %d)\n", prelude(), err, retryCount); 430 struct timespec sleeptime; 431 sleeptime.tv_sec = retryDelay / NSEC_PER_SEC; 432 sleeptime.tv_nsec = retryDelay % NSEC_PER_SEC; 433 nanosleep(&sleeptime, NULL); 434 retryDelay *= 10; 435 err = thread_get_state(_thread, flavor, state.data, &user_count); 436 } 437 438 if (err) { 439 // this is a fatal error. the program will crash if we can't scan this thread's state. 440 char thread_description[256]; 441 description(thread_description, sizeof(thread_description)); 442 auto_fatal("get_register_state(): unable to get thread state: err = %d, %s\n", err, thread_description); 443 } 444 } 445 446 447 // 448 // scan_other_thread 449 // 450 // Scan a thread other than the current thread stack and registers for block references. 451 // 452 void Thread::scan_other_thread(thread_scanner_t scanner, bool withSuspend) { 453 // <rdar://problem/6398665&6456504> can only safely scan if this thread was locked. 454 assert(_scanning.state); 455 456 // suspend the thread while scanning its registers and stack. 457 if (withSuspend) suspend(); 458 459 unsigned user_count; 460 ThreadState state; 461 get_register_state(state, user_count); 462 463 // scan the registers 464 Range register_range((void *)state.data, user_count * sizeof(natural_t)); 465 scanner(this, register_range); 466 467 // scan the stack 468 Range stack_range(state.get_stack_pointer(), _stack_base); 469 if (_stack_scan_peak > stack_range.address()) { 470 _stack_scan_peak = stack_range.address(); 471 } 472 scanner(this, stack_range); 473 474 if (withSuspend) { 475 if (ThreadLocalCollector::should_collect_suspended(*this)) { 476 // Perform a TLC and pull the resulting garbage list into global garbage 477 ThreadLocalCollector tlc(_zone, state.get_stack_pointer(), *this); 478 // Blocks in the garbage list have already been marked by the roots scan. 479 // Since these blocks are known to be garbage, explicitly unmark them now to collect them in this cycle. 480 tlc.collect_suspended(register_range, stack_range); 481 } 482 resume(); 483 } 484 } 485 486 void Thread::scan_other_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, bool withSuspend) { 487#ifdef __BLOCKS__ 488 scan_other_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, withSuspend); 489#else 490 thread_scanner_helper helper(scanner, arg); 491 scan_other_thread(helper, withSuspend); 492#endif 493 } 494 495 // 496 // suspend 497 // 498 // Temporarily suspend the thread from further execution. Returns true if the thread is 499 // still alive. 500 // 501 void Thread::suspend() { 502 // do not suspend this thread 503 if (is_current_thread() || !is_bound()) return; 504 505 if (_suspended == 0) { 506 // request thread suspension 507 kern_return_t err = thread_suspend(_thread); 508 509 if (err != KERN_SUCCESS) { 510 char thread_description[256]; 511 description(thread_description, sizeof(thread_description)); 512 auto_fatal("Thread::suspend(): unable to suspend a thread: err = %d, %s\n", err, thread_description); 513 } 514 } 515 _suspended++; 516 } 517 518 519 // 520 // resume 521 // 522 // Resume a suspended thread. 523 // 524 void Thread::resume() { 525 // do not resume this thread 526 if (is_current_thread() || !is_bound()) return; 527 528 if (_suspended == 1) { 529 // request thread resumption 530 kern_return_t err = thread_resume(_thread); 531 532 if (err != KERN_SUCCESS) { 533 char thread_description[256]; 534 description(thread_description, sizeof(thread_description)); 535 auto_fatal("Thread::resume(): unable to resume a thread: err = %d, %s\n", err, thread_description); 536 } 537 } 538 _suspended--; 539 } 540 541 542 char *Thread::description(char *buf, size_t bufsz) { 543 if (_pthread == NULL) { 544 snprintf(buf, bufsz, "Thread %p: unbound", this); 545 } else { 546 snprintf(buf, bufsz, "Thread %p: _pthread = %p, _thread = 0x%x, _stack_base = %p, enlivening %s, %d local blocks", 547 this,_pthread, _thread, _stack_base, 548 needs_enlivening().state ? " on" : "off", _localAllocations.count()); 549 } 550 return buf; 551 } 552 553 554 extern "C" void auto_print_registered_threads() { 555 Zone *zone = Zone::zone(); 556 Mutex lock(zone->threads_mutex()); 557 Thread *thread = zone->threads(); 558 while (thread != NULL) { 559 char thread_description[256]; 560 thread->description(thread_description, sizeof(thread_description)); 561 malloc_printf("%s\n", thread_description); 562 thread = thread->next(); 563 } 564 } 565 566 567#ifdef __BLOCKS__ 568 // 569 // dump 570 // 571 void Thread::dump(auto_zone_stack_dump stack_dump, auto_zone_register_dump register_dump, auto_zone_node_dump dump_local_block) { 572 Range stack_range; 573 // dump the registers. 574 if (!is_bound()) return; 575 if (register_dump) { 576 if (is_current_thread()) { 577 NonVolatileRegisters registers; 578 579 // scan the registers 580 Range range = registers.buffer_range(); 581 //scanner.scan_range_from_registers(range, *this, 0); 582 register_dump(range.address(), range.size()); 583 stack_range.set_range(__builtin_frame_address(0), _stack_base); 584 } 585 else { 586 unsigned user_count; 587 ThreadState state; 588 get_register_state(state, user_count); 589 register_dump(&state.data, user_count * sizeof(void *)); 590 stack_range.set_range(state.get_stack_pointer(), _stack_base); 591 } 592 } 593 // dump the stack 594 if (stack_dump) stack_dump(stack_range.address(), stack_range.size()); 595#if 0 596unsafe; thread might be in the middle of an STL set grow; need to put new locks into a tracing build to get this info safely 597 // dump the locals 598 if (!dump_local_block) return; 599 for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) { 600 void *block = _localAllocations[i]; 601 if (block) { 602 Subzone *subzone = Subzone::subzone(block); 603 dump_local_block(block, subzone->size(block), subzone->layout(block), subzone->refcount(block)); 604 } 605 } 606#endif 607 } 608 609 void Thread::visit(auto_zone_visitor_t *visitor) { 610 // dump the registers. 611 if (!is_bound()) return; 612 if (is_current_thread()) { 613 // snapshot the stack range. 614 auto_address_range_t stack_range = { (void *)auto_get_sp(), _stack_base }; 615 616 // snapshot the registers. 617 NonVolatileRegisters registers; 618 Range range = registers.buffer_range(); 619 auto_address_range_t registers_range = { range.address(), range.end() }; 620 visitor->visit_thread(_pthread, stack_range, registers_range); 621 } else { 622 unsigned user_count; 623 ThreadState state; 624 get_register_state(state, user_count); 625 auto_address_range_t stack_range = { state.get_stack_pointer(), _stack_base }; 626 auto_address_range_t registers_range = { &state.data, &state.data[user_count] }; 627 visitor->visit_thread(_pthread, stack_range, registers_range); 628 } 629 } 630 631#endif /* __BLOCKS__ */ 632 633}; 634