1/*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20/*
21    Thread.cpp
22    Registered Thread Management
23    Copyright (c) 2004-2011 Apple Inc. All rights reserved.
24 */
25
26#include "Definitions.h"
27#include "Thread.h"
28#include "Zone.h"
29#include "ThreadLocalCollector.h"
30#include "BlockIterator.h"
31#include <crt_externs.h>
32
33#if defined(__i386__) || defined(__arm__)
34// 32-bit x86/arm use no red zone.
35#define C_RED_ZONE 0
36#elif defined(__x86_64__)
37// according to  http://www.x86-64.org/documentation/abi.pdf (page 15)
38#define C_RED_ZONE 128
39#else
40#error Unknown Architecture
41#endif
42
43
44namespace Auto {
45
46
47    //----- Thread -----//
48
49    Thread::Thread(Zone *zone)
50        : _next(NULL), _zone(zone), _pthread(NULL), _thread(MACH_PORT_NULL), _stack_base(NULL),
51          _scanning(), _suspended(0), _stack_scan_peak(NULL), _tlc(NULL), _localAllocations(64), _localsGuard(SENTINEL_T_INITIALIZER),
52          _destructor_count(0), _in_collector(false), _tlc_watchdog_counter(0), _pending_count_accumulator(NULL)
53    {
54        bind();
55    }
56
57    void Thread::flush_cache(AllocationCache &cache) {
58        usword_t count = 0, size = 0;
59        for (usword_t i = 1; i < AllocationCache::cache_size; ++i) {
60            FreeList &list = cache[i];
61            const size_t blockSize = i * allocate_quantum_small;
62            while (void *block = list.pop()->address()) {
63                // mark the thread local block as global so that it can be collected
64                assert(_zone->in_subzone_memory(block));
65                Subzone *subzone = Subzone::subzone(block);
66                subzone->admin()->mark_allocated(block, i, AUTO_MEMORY_UNSCANNED, false, false);
67                ++count;
68                size += blockSize;
69            }
70        }
71        _zone->adjust_allocation_counter(size);
72    }
73
74    Thread::~Thread() {
75        /* If any blocks remain in our local allocations list mark them as global. */
76        /* We cannot reclaim them because we cannot finalize here. */
77        if (_localAllocations.count() > 0) {
78            for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) {
79                void *block = _localAllocations[i];
80                if (block) {
81                    Subzone *subzone = Subzone::subzone(block);
82                    subzone->make_global(subzone->quantum_index_unchecked(block));
83                }
84            }
85        }
86
87        // release the per-thread allocation cache items
88        flush_cache(_allocation_cache[AUTO_MEMORY_SCANNED]);
89        flush_cache(_allocation_cache[AUTO_MEMORY_UNSCANNED]);
90    }
91
92    //
93    // bind
94    //
95    // Associate the Thread with the calling pthread.
96    // This declares the Zone's interest in scanning the calling pthread's stack during collections.
97    //
98    void Thread::bind() {
99        _pthread = pthread_self();
100        _thread = pthread_mach_thread_np(_pthread);
101        // The Kernel stores stores the environment, and command line arguments on the main thread stack.
102        // Skip that to avoid false rooting from the character data.
103        _stack_base = pthread_main_np() ? align_down(**(void***)_NSGetArgv(), pointer_alignment) : pthread_get_stackaddr_np(_pthread);
104        _stack_scan_peak = _stack_base;
105    }
106
107
108    //
109    // unbind
110    //
111    // Disassociate the Thread from the calling pthread.
112    // May only be called from the same pthread that previously called bind().
113    // unbind() synchronizes with stack scanning to ensure that if a stack scan is in progress
114    // the stack will remain available until scanning is complete. Returns true if the thread
115    // can be reclaimed immediately.
116    //
117    bool Thread::unbind() {
118        SpinLock lock(&_scanning.lock);
119        assert(!_scanning.state);
120        assert(pthread_self() == _pthread);
121        _pthread = NULL;
122        _thread = MACH_PORT_NULL;
123        _stack_base = NULL;
124        _stack_scan_peak = NULL;
125        return true;
126    }
127
128    bool Thread::lockForScanning() {
129        spin_lock(&_scanning.lock);
130        if (is_bound()) {
131            _scanning.state = true;
132            return true;
133        }
134        spin_unlock(&_scanning.lock);
135        return false;
136    }
137
138    void Thread::unlockForScanning() {
139        _scanning.state = false;
140        spin_unlock(&_scanning.lock);
141    }
142
143
144    struct enliven_do {
145        void operator ()(Subzone *subzone, usword_t q) {
146            if (!subzone->test_and_set_mark(q) && subzone->is_scanned(q))
147                subzone->test_and_set_pending(q, true);
148        }
149
150        void operator ()(Large *large) {
151            if (!large->test_and_set_mark() && large->is_scanned())
152                large->set_pending();
153        }
154    };
155
156    void Thread::enliven_block(void *block) {
157        enliven_do op;
158        blockDo(_zone, block, op);
159    }
160
161
162    //
163    // flush_local_blocks
164    //
165    // empties the local allocations hash, making all blocks global
166    //
167    void Thread::flush_local_blocks()
168    {
169        Sentinel::assert_guarded(_localsGuard);
170        // This only gets called if the local block set grows much larger than expected.
171        uint32_t first = _localAllocations.firstOccupiedSlot();
172        uint32_t last = _localAllocations.lastOccupiedSlot();
173        for (uint32_t i = first; i <= last; i++) {
174            void *block = _localAllocations[i];
175            if (block) {
176                Subzone *subzone = Subzone::subzone(block);
177                subzone->make_global(subzone->quantum_index(block));
178                _localAllocations.remove(i);
179            }
180        }
181        // this will cause _localAllocations to resize down its internal pointer buffer
182        _localAllocations.grow();
183    }
184
185
186    //
187    // reap_local_blocks
188    //
189    // finalize and free all local blocks without doing any scanning
190    // should only be called when it is known the stack is shallow and cannot root anything
191    //
192    void Thread::reap_all_local_blocks()
193    {
194        Sentinel guard(_localsGuard);
195        if (_localAllocations.count() > 0) {
196            ThreadLocalCollector tlc(_zone, NULL, *this);
197            tlc.reap_all();
198            // this will cause _localAllocations to resize down its internal pointer buffer
199            _localAllocations.grow();
200        }
201    }
202
203
204    // BlockRef FIXME: temporary glue code until all call sites convert to BlockRef.
205    template <> void Thread::block_escaped<void *>(void *block) {
206        Subzone *subzone;
207        if (!_zone->in_subzone_memory(block))
208            return;
209        subzone = Subzone::subzone(block);
210        usword_t q;
211        if (!subzone->block_is_start(block, &q)) return; // we are not interested in non-block pointers
212        SubzoneBlockRef ref(subzone, q);
213        if (ref.is_thread_local()) block_escaped(ref);
214    }
215
216    //
217    // block_escaped
218    //
219    // a block is escaping the stack; remove it from local set (cheaply)
220    //
221    template <class BlockRef> void Thread::block_escaped_internal(BlockRef block)
222    {
223        assert(block.is_thread_local());
224        void *addr = block.address();
225        /*
226         It is possible that a thread might construct a pointer to a block which is local to another thread.
227         If that pointer gets stored through a write barrier then we wind up here.
228         It would be an error for the thread to dereference that pointer, but just storing it is technically ok.
229         We must be careful to validate that the block is local to *this* thread.
230         */
231        if (auto_expect_false(block.is_local_garbage())) {
232            /*
233             If we see a local garbage block we must first ensure that it is local to the current thread.
234             If it is then we must evict any non-garbage blocks which are reachable from that block.
235             However, we don't currently have a way to discover when one thread local garbage block is
236             reachable from another thread local garbage block. The scanner is not equipped to handle that.
237             So we just escape all blocks reachable from the entire garbage list. This should be very rare.
238             Note that the garbage blocks themselves remain thread local garbage. Only reachable non-garbage
239             blocks are made global.
240             */
241
242            // verify the block is in this thread's garbage list
243            if (_tlc && _tlc->block_in_garbage_list(addr)) {
244                _tlc->evict_local_garbage();
245            }
246        } else {
247            Sentinel guard(_localsGuard);
248            // verify the block is local to this thread
249            if (_localAllocations.contains(addr)) {
250                if (block.should_scan_local_block()) {
251                    ThreadLocalCollector scanner(_zone, NULL, *this);
252                    scanner.eject_local_block(addr);
253                }
254                else {	// just do the one
255                    block.make_global();
256                    _localAllocations.remove(addr);
257                    usword_t size = block.size();
258                    _zone->adjust_allocation_counter(size);
259                }
260            }
261        }
262    }
263
264#ifdef DEBUG
265    // In release builds the optimizer knows this never gets called. But we need it to link a debug build.
266    template <> void Thread::block_escaped_internal<class LargeBlockRef>(LargeBlockRef block) {
267        __builtin_trap();
268    }
269#endif
270
271    //
272    // track_local_memcopy
273    //
274    // If dst is contained in a local scanned object, then if src is also scanned and has
275    // local objects that they, in turn, are marked stored as well as dest
276    // Otherwise, if dst is unknown, mark all local objects at src as escaped.
277    // Src might be the stack.
278    void Thread::track_local_memcopy(const void *src, void *dst, size_t size) {
279        Subzone *dstSubzone = NULL;
280        void *dstBase = NULL;
281        bool should_track_local = false;
282        if (_zone->in_subzone_memory((void *)dst)) {
283            dstSubzone = Subzone::subzone((void *)dst);
284            usword_t dst_q;
285            dstBase = dstSubzone->block_start((void *)dst, dst_q);
286            if (dstBase) {
287                // if memmoving within block bail early
288                size_t dstSize = dstSubzone->size(dst_q);
289                if (src > dstBase && src < ((char *)dstBase + dstSize))
290                    return;
291                if (dstSubzone->is_live_thread_local(dst_q)
292                    && (dstSubzone->should_scan_local_block(dst_q) || dstSubzone->is_scanned(dst_q))) {
293                    should_track_local = true;
294                }
295            }
296        }
297        void **start = (void **)src;
298        void **end = start + size/sizeof(void *);
299        bool dstStoredInto = false;
300        while (start < end) {
301            void *candidate = *start;
302            if (candidate) {
303                if (_zone->in_subzone_memory(candidate)) {
304                    Subzone *candidateSubzone = Subzone::subzone(candidate);
305                    usword_t q = candidateSubzone->quantum_index_unchecked(candidate);
306                    if (q < candidateSubzone->allocation_limit() && candidateSubzone->is_live_thread_local(q)) {// && thread->_localAllocations.contains(candidate))
307                        if (should_track_local) {
308                            dstStoredInto = true;
309                            break;
310                        }
311                        else {
312                            SubzoneBlockRef candidateRef(candidateSubzone, q);
313                            block_escaped(candidateRef);
314                        }
315                    }
316                }
317            }
318            start++;
319        }
320        if (dstStoredInto) {
321            // we can only get here if dstBase is a valid block
322            dstSubzone->set_scan_local_block(dstSubzone->quantum_index_unchecked(dstBase));
323        }
324    }
325
326    bool Thread::thread_cache_add(void *block, Subzone *subzone, usword_t q) {
327        // don't cache medium subzone blocks.
328        bool cached = false;
329        if (subzone->is_small()) {
330            usword_t n = subzone->length(q);
331            if (n <= max_cached_small_multiple) {
332                Admin *admin = subzone->admin();
333                admin->mark_cached(subzone, q, n);
334                FreeList &list = allocation_cache(admin->layout())[n];
335                list.push(block, (n << allocate_quantum_small_log2));
336                cached = true;
337            }
338        }
339        return cached;
340    }
341
342    //
343    // scan_current_thread
344    //
345    // Scan the current thread stack and registers for block references.
346    //
347    void Thread::scan_current_thread(thread_scanner_t scanner, void *stack_bottom) {
348        // capture non-volatile registers
349        NonVolatileRegisters registers;
350
351        // scan the registers
352        Range range = registers.buffer_range();
353        scanner(this, range);
354
355        // scan the stack
356        range.set_range(stack_bottom, _stack_base);
357        if (_stack_scan_peak > range.address()) {
358            _stack_scan_peak = range.address();
359        }
360        scanner(this, range);
361    }
362
363#ifndef __BLOCKS__
364    class thread_scanner_helper : public Thread::thread_scanner {
365        void (*_scanner) (Thread*, const Range&, void*);
366        void *_arg;
367    public:
368        thread_scanner_helper(void (*scanner) (Thread*, const Range&, void*), void *arg) : _scanner(scanner), _arg(arg) {}
369        virtual void operator() (Thread *thread, const Range &range) { _scanner(thread, range, _arg); }
370    };
371#endif
372
373    void Thread::scan_current_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, void *stack_bottom) {
374#ifdef __BLOCKS__
375        scan_current_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, stack_bottom);
376#else
377        thread_scanner_helper helper(scanner, arg);
378        scan_current_thread(helper, stack_bottom);
379#endif
380    }
381
382    union ThreadState {
383#if defined(__i386__)
384        i386_thread_state_t  regs;
385#define THREAD_STATE_COUNT i386_THREAD_STATE_COUNT
386#define THREAD_STATE_FLAVOR i386_THREAD_STATE
387#define THREAD_STATE_SP __esp
388#elif defined(__x86_64__)
389        x86_thread_state64_t regs;
390#define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT
391#define THREAD_STATE_FLAVOR x86_THREAD_STATE64
392#define THREAD_STATE_SP __rsp
393#elif defined(__arm__)
394        arm_thread_state_t regs;
395#define THREAD_STATE_COUNT ARM_THREAD_STATE_COUNT
396#define THREAD_STATE_FLAVOR ARM_THREAD_STATE
397#define THREAD_STATE_SP __sp
398#else
399#error Unknown Architecture
400#endif
401        thread_state_data_t  data;
402
403        void* get_stack_pointer() {
404            // <rdar://problem/6453396> always align the stack address to a pointer boundary.
405            return align_down(reinterpret_cast<void*>(regs.THREAD_STATE_SP - C_RED_ZONE), pointer_alignment);
406        }
407    };
408
409
410    //
411    // get_register_state
412    //
413    // read another thread's registers
414    //
415    void Thread::get_register_state(ThreadState &state, unsigned &user_count) {
416        // select the register capture flavor
417        user_count = THREAD_STATE_COUNT;
418        thread_state_flavor_t flavor = THREAD_STATE_FLAVOR;
419
420        // get the thread register state
421        kern_return_t err = thread_get_state(_thread, flavor, state.data, &user_count);
422        uint64_t retryDelay = 1;
423
424        // We sometimes see KERN_ABORTED in conjunction with fork(). Typically a single retry succeeds in that case.
425        // We also see various other error codes during thread exit/teardown. Retry generously until the port is dead (MACH_SEND_INVALID_DEST)
426        // because otherwise we have a fatal error. Using a logarithmically increasing delay between iterations, which
427        // results in a TOTAL sleep time of 1.111111 seconds to let the dying thread settle before we give up.
428        while ((err != KERN_SUCCESS) && (err == KERN_ABORTED && retryDelay < 10 * NSEC_PER_SEC)) {
429            //malloc_printf("*** %s: unable to get thread state %d. Retrying (retry count: %d)\n", prelude(), err, retryCount);
430            struct timespec sleeptime;
431            sleeptime.tv_sec = retryDelay / NSEC_PER_SEC;
432            sleeptime.tv_nsec = retryDelay % NSEC_PER_SEC;
433            nanosleep(&sleeptime, NULL);
434            retryDelay *= 10;
435            err = thread_get_state(_thread, flavor, state.data, &user_count);
436        }
437
438        if (err) {
439            // this is a fatal error. the program will crash if we can't scan this thread's state.
440            char thread_description[256];
441            description(thread_description, sizeof(thread_description));
442            auto_fatal("get_register_state():  unable to get thread state:  err = %d, %s\n", err, thread_description);
443        }
444    }
445
446
447    //
448    // scan_other_thread
449    //
450    // Scan a thread other than the current thread stack and registers for block references.
451    //
452    void Thread::scan_other_thread(thread_scanner_t scanner, bool withSuspend) {
453        // <rdar://problem/6398665&6456504> can only safely scan if this thread was locked.
454        assert(_scanning.state);
455
456        // suspend the thread while scanning its registers and stack.
457        if (withSuspend) suspend();
458
459        unsigned user_count;
460        ThreadState state;
461        get_register_state(state, user_count);
462
463        // scan the registers
464        Range register_range((void *)state.data, user_count * sizeof(natural_t));
465        scanner(this, register_range);
466
467        // scan the stack
468        Range stack_range(state.get_stack_pointer(), _stack_base);
469        if (_stack_scan_peak > stack_range.address()) {
470            _stack_scan_peak = stack_range.address();
471        }
472        scanner(this, stack_range);
473
474        if (withSuspend) {
475            if (ThreadLocalCollector::should_collect_suspended(*this)) {
476                // Perform a TLC and pull the resulting garbage list into global garbage
477                ThreadLocalCollector tlc(_zone, state.get_stack_pointer(), *this);
478                // Blocks in the garbage list have already been marked by the roots scan.
479                // Since these blocks are known to be garbage, explicitly unmark them now to collect them in this cycle.
480                tlc.collect_suspended(register_range, stack_range);
481            }
482            resume();
483        }
484    }
485
486    void Thread::scan_other_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, bool withSuspend) {
487#ifdef __BLOCKS__
488        scan_other_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, withSuspend);
489#else
490        thread_scanner_helper helper(scanner, arg);
491        scan_other_thread(helper, withSuspend);
492#endif
493    }
494
495    //
496    // suspend
497    //
498    // Temporarily suspend the thread from further execution.  Returns true if the thread is
499    // still alive.
500    //
501    void Thread::suspend()  {
502        // do not suspend this thread
503        if (is_current_thread() || !is_bound()) return;
504
505        if (_suspended == 0) {
506            // request thread suspension
507            kern_return_t err = thread_suspend(_thread);
508
509            if (err != KERN_SUCCESS) {
510                char thread_description[256];
511                description(thread_description, sizeof(thread_description));
512                auto_fatal("Thread::suspend():  unable to suspend a thread:  err = %d, %s\n", err, thread_description);
513            }
514        }
515        _suspended++;
516    }
517
518
519    //
520    // resume
521    //
522    // Resume a suspended thread.
523    //
524    void Thread::resume() {
525        // do not resume this thread
526        if (is_current_thread() || !is_bound()) return;
527
528        if (_suspended == 1) {
529            // request thread resumption
530            kern_return_t err = thread_resume(_thread);
531
532            if (err != KERN_SUCCESS) {
533                char thread_description[256];
534                description(thread_description, sizeof(thread_description));
535                auto_fatal("Thread::resume():  unable to resume a thread:  err = %d, %s\n", err, thread_description);
536            }
537        }
538        _suspended--;
539    }
540
541
542    char *Thread::description(char *buf, size_t bufsz) {
543        if (_pthread == NULL) {
544            snprintf(buf, bufsz, "Thread %p: unbound", this);
545        } else {
546            snprintf(buf, bufsz, "Thread %p: _pthread = %p, _thread = 0x%x, _stack_base = %p, enlivening %s, %d local blocks",
547                     this,_pthread, _thread, _stack_base,
548                     needs_enlivening().state ? " on" : "off", _localAllocations.count());
549        }
550        return buf;
551    }
552
553
554    extern "C" void auto_print_registered_threads() {
555        Zone *zone = Zone::zone();
556        Mutex lock(zone->threads_mutex());
557        Thread *thread = zone->threads();
558        while (thread != NULL) {
559            char thread_description[256];
560            thread->description(thread_description, sizeof(thread_description));
561            malloc_printf("%s\n", thread_description);
562            thread = thread->next();
563        }
564    }
565
566
567#ifdef __BLOCKS__
568    //
569    // dump
570    //
571    void Thread::dump(auto_zone_stack_dump stack_dump, auto_zone_register_dump register_dump, auto_zone_node_dump dump_local_block) {
572        Range stack_range;
573        // dump the registers.
574        if (!is_bound()) return;
575        if (register_dump) {
576            if (is_current_thread()) {
577                NonVolatileRegisters registers;
578
579                // scan the registers
580                Range range = registers.buffer_range();
581                //scanner.scan_range_from_registers(range, *this, 0);
582                register_dump(range.address(), range.size());
583                stack_range.set_range(__builtin_frame_address(0), _stack_base);
584            }
585            else {
586                unsigned user_count;
587                ThreadState state;
588                get_register_state(state, user_count);
589                register_dump(&state.data, user_count * sizeof(void *));
590                stack_range.set_range(state.get_stack_pointer(), _stack_base);
591            }
592        }
593        // dump the stack
594        if (stack_dump) stack_dump(stack_range.address(), stack_range.size());
595#if 0
596unsafe; thread might be in the middle of an STL set grow; need to put new locks into a tracing build to get this info safely
597        // dump the locals
598        if (!dump_local_block) return;
599        for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) {
600            void *block = _localAllocations[i];
601            if (block) {
602                Subzone *subzone = Subzone::subzone(block);
603                dump_local_block(block, subzone->size(block), subzone->layout(block), subzone->refcount(block));
604            }
605        }
606#endif
607    }
608
609    void Thread::visit(auto_zone_visitor_t *visitor) {
610        // dump the registers.
611        if (!is_bound()) return;
612        if (is_current_thread()) {
613            // snapshot the stack range.
614            auto_address_range_t stack_range = { (void *)auto_get_sp(), _stack_base };
615
616            // snapshot the registers.
617            NonVolatileRegisters registers;
618            Range range = registers.buffer_range();
619            auto_address_range_t registers_range = { range.address(), range.end() };
620            visitor->visit_thread(_pthread, stack_range, registers_range);
621        } else {
622            unsigned user_count;
623            ThreadState state;
624            get_register_state(state, user_count);
625            auto_address_range_t stack_range = { state.get_stack_pointer(), _stack_base };
626            auto_address_range_t registers_range = { &state.data, &state.data[user_count] };
627            visitor->visit_thread(_pthread, stack_range, registers_range);
628        }
629    }
630
631#endif /* __BLOCKS__ */
632
633};
634