1/*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20/*
21    Thread.cpp
22    Registered Thread Management
23    Copyright (c) 2004-2011 Apple Inc. All rights reserved.
24 */
25
26#include "Definitions.h"
27#include "Thread.h"
28#include "Zone.h"
29#include "ThreadLocalCollector.h"
30#include "BlockIterator.h"
31#include <crt_externs.h>
32
33#if defined(__i386__) || defined(__arm__)
34// 32-bit x86/arm use no red zone.
35#define C_RED_ZONE 0
36#elif defined(__x86_64__)
37// according to  http://www.x86-64.org/documentation/abi.pdf (page 15)
38#define C_RED_ZONE 128
39#else
40#error Unknown Architecture
41#endif
42
43
44namespace Auto {
45
46
47    //----- Thread -----//
48
49    Thread::Thread(Zone *zone)
50        : _next(NULL), _zone(zone), _pthread(NULL), _thread(MACH_PORT_NULL), _stack_base(NULL),
51          _scanning(), _suspended(0), _stack_scan_peak(NULL), _tlc(NULL), _localAllocations(64), _localsGuard(SENTINEL_T_INITIALIZER),
52          _destructor_count(0), _in_collector(false), _tlc_watchdog_counter(0), _pending_count_accumulator(NULL)
53    {
54        bind();
55    }
56
57    void Thread::flush_cache(AllocationCache &cache) {
58        usword_t count = 0, size = 0;
59        for (usword_t i = 1; i < AllocationCache::cache_size; ++i) {
60            FreeList &list = cache[i];
61            const size_t blockSize = i * allocate_quantum_small;
62            while (void *block = list.pop()->address()) {
63                // mark the thread local block as global so that it can be collected
64                assert(_zone->in_subzone_memory(block));
65                Subzone *subzone = Subzone::subzone(block);
66                subzone->admin()->mark_allocated(block, i, AUTO_MEMORY_UNSCANNED, false, false);
67                ++count;
68                size += blockSize;
69            }
70        }
71        _zone->adjust_allocation_counter(size);
72    }
73
74    Thread::~Thread() {
75        /* If any blocks remain in our local allocations list mark them as global. */
76        /* We cannot reclaim them because we cannot finalize here. */
77        if (_localAllocations.count() > 0) {
78            for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) {
79                void *block = _localAllocations[i];
80                if (block) {
81                    Subzone *subzone = Subzone::subzone(block);
82                    subzone->make_global(subzone->quantum_index_unchecked(block));
83                }
84            }
85        }
86
87        // release the per-thread allocation cache items
88        flush_cache(_allocation_cache[AUTO_MEMORY_SCANNED]);
89        flush_cache(_allocation_cache[AUTO_MEMORY_UNSCANNED]);
90    }
91
92    //
93    // bind
94    //
95    // Associate the Thread with the calling pthread.
96    // This declares the Zone's interest in scanning the calling pthread's stack during collections.
97    //
98    void Thread::bind() {
99        _pthread = pthread_self();
100        _thread = pthread_mach_thread_np(_pthread);
101        // The Kernel stores stores the environment, and command line arguments on the main thread stack.
102        // Skip that to avoid false rooting from the character data.
103        _stack_base = pthread_main_np() ? align_down(**(void***)_NSGetArgv(), pointer_alignment) : pthread_get_stackaddr_np(_pthread);
104        _stack_scan_peak = _stack_base;
105    }
106
107
108    //
109    // unbind
110    //
111    // Disassociate the Thread from the calling pthread.
112    // May only be called from the same pthread that previously called bind().
113    // unbind() synchronizes with stack scanning to ensure that if a stack scan is in progress
114    // the stack will remain available until scanning is complete. Returns true if the thread
115    // can be reclaimed immediately.
116    //
117    bool Thread::unbind() {
118        SpinLock lock(&_scanning.lock);
119        assert(!_scanning.state);
120        assert(pthread_self() == _pthread);
121        _pthread = NULL;
122        _thread = MACH_PORT_NULL;
123        _stack_base = NULL;
124        _stack_scan_peak = NULL;
125        return true;
126    }
127
128    bool Thread::lockForScanning() {
129        spin_lock(&_scanning.lock);
130        if (is_bound()) {
131            _scanning.state = true;
132            return true;
133        }
134        spin_unlock(&_scanning.lock);
135        return false;
136    }
137
138    void Thread::unlockForScanning() {
139        _scanning.state = false;
140        spin_unlock(&_scanning.lock);
141    }
142
143
144    struct enliven_do {
145        void operator ()(Subzone *subzone, usword_t q) {
146            if (!subzone->test_and_set_mark(q) && subzone->is_scanned(q))
147                subzone->test_and_set_pending(q, true);
148        }
149
150        void operator ()(Large *large) {
151            if (!large->test_and_set_mark() && large->is_scanned())
152                large->set_pending();
153        }
154    };
155
156    void Thread::enliven_block(void *block) {
157        enliven_do op;
158        blockDo(_zone, block, op);
159    }
160
161
162    //
163    // clear_stack
164    //
165    // clears stack memory from the current sp to the depth that was scanned by the last collection
166    //
167    void Thread::clear_stack() {
168        // We need to be careful about calling functions during stack clearing.
169        // We can't use bzero or the like to do the zeroing because we don't know how much stack they use.
170        // The amount to clear is typically small so just use a simple loop writing pointer sized NULL values.
171        void **sp = (void **)auto_get_sp();
172        void **zero_addr = (void **)_stack_scan_peak;
173        _stack_scan_peak = sp;
174        while (zero_addr < sp) {
175            *zero_addr = NULL;
176            zero_addr++;
177        }
178    }
179
180
181    //
182    // flush_local_blocks
183    //
184    // empties the local allocations hash, making all blocks global
185    //
186    void Thread::flush_local_blocks()
187    {
188        Sentinel::assert_guarded(_localsGuard);
189        // This only gets called if the local block set grows much larger than expected.
190        uint32_t first = _localAllocations.firstOccupiedSlot();
191        uint32_t last = _localAllocations.lastOccupiedSlot();
192        for (uint32_t i = first; i <= last; i++) {
193            void *block = _localAllocations[i];
194            if (block) {
195                Subzone *subzone = Subzone::subzone(block);
196                subzone->make_global(subzone->quantum_index(block));
197                _localAllocations.remove(i);
198            }
199        }
200        // this will cause _localAllocations to resize down its internal pointer buffer
201        _localAllocations.grow();
202    }
203
204
205    //
206    // reap_local_blocks
207    //
208    // finalize and free all local blocks without doing any scanning
209    // should only be called when it is known the stack is shallow and cannot root anything
210    //
211    void Thread::reap_all_local_blocks()
212    {
213        Sentinel guard(_localsGuard);
214        if (_localAllocations.count() > 0) {
215            ThreadLocalCollector tlc(_zone, NULL, *this);
216            tlc.reap_all();
217            // this will cause _localAllocations to resize down its internal pointer buffer
218            _localAllocations.grow();
219        }
220    }
221
222
223    // BlockRef FIXME: temporary glue code until all call sites convert to BlockRef.
224    template <> void Thread::block_escaped<void *>(void *block) {
225        Subzone *subzone;
226        if (!_zone->in_subzone_memory(block))
227            return;
228        subzone = Subzone::subzone(block);
229        usword_t q;
230        if (!subzone->block_is_start(block, &q)) return; // we are not interested in non-block pointers
231        SubzoneBlockRef ref(subzone, q);
232        if (ref.is_thread_local()) block_escaped(ref);
233    }
234
235    //
236    // block_escaped
237    //
238    // a block is escaping the stack; remove it from local set (cheaply)
239    //
240    template <class BlockRef> void Thread::block_escaped_internal(BlockRef block)
241    {
242        assert(block.is_thread_local());
243        void *addr = block.address();
244        /*
245         It is possible that a thread might construct a pointer to a block which is local to another thread.
246         If that pointer gets stored through a write barrier then we wind up here.
247         It would be an error for the thread to dereference that pointer, but just storing it is technically ok.
248         We must be careful to validate that the block is local to *this* thread.
249         */
250        if (auto_expect_false(block.is_local_garbage())) {
251            /*
252             If we see a local garbage block we must first ensure that it is local to the current thread.
253             If it is then we must evict any non-garbage blocks which are reachable from that block.
254             However, we don't currently have a way to discover when one thread local garbage block is
255             reachable from another thread local garbage block. The scanner is not equipped to handle that.
256             So we just escape all blocks reachable from the entire garbage list. This should be very rare.
257             Note that the garbage blocks themselves remain thread local garbage. Only reachable non-garbage
258             blocks are made global.
259             */
260
261            // verify the block is in this thread's garbage list
262            if (_tlc && _tlc->block_in_garbage_list(addr)) {
263                _tlc->evict_local_garbage();
264            }
265        } else {
266            Sentinel guard(_localsGuard);
267            // verify the block is local to this thread
268            if (_localAllocations.contains(addr)) {
269                if (block.should_scan_local_block()) {
270                    ThreadLocalCollector scanner(_zone, NULL, *this);
271                    scanner.eject_local_block(addr);
272                }
273                else {	// just do the one
274                    block.make_global();
275                    _localAllocations.remove(addr);
276                    usword_t size = block.size();
277                    _zone->adjust_allocation_counter(size);
278                }
279            }
280        }
281    }
282
283#ifdef DEBUG
284    // In release builds the optimizer knows this never gets called. But we need it to link a debug build.
285    template <> void Thread::block_escaped_internal<class LargeBlockRef>(LargeBlockRef block) {
286        __builtin_trap();
287    }
288#endif
289
290    //
291    // track_local_memcopy
292    //
293    // If dst is contained in a local scanned object, then if src is also scanned and has
294    // local objects that they, in turn, are marked stored as well as dest
295    // Otherwise, if dst is unknown, mark all local objects at src as escaped.
296    // Src might be the stack.
297    void Thread::track_local_memcopy(const void *src, void *dst, size_t size) {
298        Subzone *dstSubzone = NULL;
299        void *dstBase = NULL;
300        bool should_track_local = false;
301        if (_zone->in_subzone_memory((void *)dst)) {
302            dstSubzone = Subzone::subzone((void *)dst);
303            usword_t dst_q;
304            dstBase = dstSubzone->block_start((void *)dst, dst_q);
305            if (dstBase) {
306                // if memmoving within block bail early
307                size_t dstSize = dstSubzone->size(dst_q);
308                if (src > dstBase && src < ((char *)dstBase + dstSize))
309                    return;
310                if (dstSubzone->is_live_thread_local(dst_q)
311                    && (dstSubzone->should_scan_local_block(dst_q) || dstSubzone->is_scanned(dst_q))) {
312                    should_track_local = true;
313                }
314            }
315        }
316        void **start = (void **)src;
317        void **end = start + size/sizeof(void *);
318        bool dstStoredInto = false;
319        while (start < end) {
320            void *candidate = *start;
321            if (candidate) {
322                if (_zone->in_subzone_memory(candidate)) {
323                    Subzone *candidateSubzone = Subzone::subzone(candidate);
324                    usword_t q = candidateSubzone->quantum_index_unchecked(candidate);
325                    if (q < candidateSubzone->allocation_limit() && candidateSubzone->is_live_thread_local(q)) {// && thread->_localAllocations.contains(candidate))
326                        if (should_track_local) {
327                            dstStoredInto = true;
328                            break;
329                        }
330                        else {
331                            SubzoneBlockRef candidateRef(candidateSubzone, q);
332                            block_escaped(candidateRef);
333                        }
334                    }
335                }
336            }
337            start++;
338        }
339        if (dstStoredInto) {
340            // we can only get here if dstBase is a valid block
341            dstSubzone->set_scan_local_block(dstSubzone->quantum_index_unchecked(dstBase));
342        }
343    }
344
345    bool Thread::thread_cache_add(void *block, Subzone *subzone, usword_t q) {
346        // don't cache medium subzone blocks.
347        bool cached = false;
348        if (subzone->is_small()) {
349            usword_t n = subzone->length(q);
350            if (n <= max_cached_small_multiple) {
351                Admin *admin = subzone->admin();
352                admin->mark_cached(subzone, q, n);
353                FreeList &list = allocation_cache(admin->layout())[n];
354                list.push(block, (n << allocate_quantum_small_log2));
355                cached = true;
356            }
357        }
358        return cached;
359    }
360
361    //
362    // scan_current_thread
363    //
364    // Scan the current thread stack and registers for block references.
365    //
366    void Thread::scan_current_thread(thread_scanner_t scanner, void *stack_bottom) {
367        // capture non-volatile registers
368        NonVolatileRegisters registers;
369
370        // scan the registers
371        Range range = registers.buffer_range();
372        scanner(this, range);
373
374        // scan the stack
375        range.set_range(stack_bottom, _stack_base);
376        if (_stack_scan_peak > range.address()) {
377            _stack_scan_peak = range.address();
378        }
379        scanner(this, range);
380    }
381
382#ifndef __BLOCKS__
383    class thread_scanner_helper : public Thread::thread_scanner {
384        void (*_scanner) (Thread*, const Range&, void*);
385        void *_arg;
386    public:
387        thread_scanner_helper(void (*scanner) (Thread*, const Range&, void*), void *arg) : _scanner(scanner), _arg(arg) {}
388        virtual void operator() (Thread *thread, const Range &range) { _scanner(thread, range, _arg); }
389    };
390#endif
391
392    void Thread::scan_current_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, void *stack_bottom) {
393#ifdef __BLOCKS__
394        scan_current_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, stack_bottom);
395#else
396        thread_scanner_helper helper(scanner, arg);
397        scan_current_thread(helper, stack_bottom);
398#endif
399    }
400
401    union ThreadState {
402#if defined(__i386__)
403        i386_thread_state_t  regs;
404#define THREAD_STATE_COUNT i386_THREAD_STATE_COUNT
405#define THREAD_STATE_FLAVOR i386_THREAD_STATE
406#define THREAD_STATE_SP __esp
407#elif defined(__x86_64__)
408        x86_thread_state64_t regs;
409#define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT
410#define THREAD_STATE_FLAVOR x86_THREAD_STATE64
411#define THREAD_STATE_SP __rsp
412#elif defined(__arm__)
413        arm_thread_state_t regs;
414#define THREAD_STATE_COUNT ARM_THREAD_STATE_COUNT
415#define THREAD_STATE_FLAVOR ARM_THREAD_STATE
416#define THREAD_STATE_SP __sp
417#else
418#error Unknown Architecture
419#endif
420        thread_state_data_t  data;
421
422        void* get_stack_pointer() {
423            // <rdar://problem/6453396> always align the stack address to a pointer boundary.
424            return align_down(reinterpret_cast<void*>(regs.THREAD_STATE_SP - C_RED_ZONE), pointer_alignment);
425        }
426    };
427
428
429    //
430    // get_register_state
431    //
432    // read another thread's registers
433    //
434    void Thread::get_register_state(ThreadState &state, unsigned &user_count) {
435        // select the register capture flavor
436        user_count = THREAD_STATE_COUNT;
437        thread_state_flavor_t flavor = THREAD_STATE_FLAVOR;
438
439        // get the thread register state
440        kern_return_t err = thread_get_state(_thread, flavor, state.data, &user_count);
441        uint64_t retryDelay = 1;
442
443        // We sometimes see KERN_ABORTED in conjunction with fork(). Typically a single retry succeeds in that case.
444        // We also see various other error codes during thread exit/teardown. Retry generously until the port is dead (MACH_SEND_INVALID_DEST)
445        // because otherwise we have a fatal error. Using a logarithmically increasing delay between iterations, which
446        // results in a TOTAL sleep time of 1.111111 seconds to let the dying thread settle before we give up.
447        while ((err != KERN_SUCCESS) && (err == KERN_ABORTED && retryDelay < 10 * NSEC_PER_SEC)) {
448            //malloc_printf("*** %s: unable to get thread state %d. Retrying (retry count: %d)\n", prelude(), err, retryCount);
449            struct timespec sleeptime;
450            sleeptime.tv_sec = retryDelay / NSEC_PER_SEC;
451            sleeptime.tv_nsec = retryDelay % NSEC_PER_SEC;
452            nanosleep(&sleeptime, NULL);
453            retryDelay *= 10;
454            err = thread_get_state(_thread, flavor, state.data, &user_count);
455        }
456
457        if (err) {
458            // this is a fatal error. the program will crash if we can't scan this thread's state.
459            char thread_description[256];
460            description(thread_description, sizeof(thread_description));
461            auto_fatal("get_register_state():  unable to get thread state:  err = %d, %s\n", err, thread_description);
462        }
463    }
464
465
466    //
467    // scan_other_thread
468    //
469    // Scan a thread other than the current thread stack and registers for block references.
470    //
471    void Thread::scan_other_thread(thread_scanner_t scanner, bool withSuspend) {
472        // <rdar://problem/6398665&6456504> can only safely scan if this thread was locked.
473        assert(_scanning.state);
474
475        // suspend the thread while scanning its registers and stack.
476        if (withSuspend) suspend();
477
478        unsigned user_count;
479        ThreadState state;
480        get_register_state(state, user_count);
481
482        // scan the registers
483        Range register_range((void *)state.data, user_count * sizeof(natural_t));
484        scanner(this, register_range);
485
486        // scan the stack
487        Range stack_range(state.get_stack_pointer(), _stack_base);
488        if (_stack_scan_peak > stack_range.address()) {
489            _stack_scan_peak = stack_range.address();
490        }
491        scanner(this, stack_range);
492
493        if (withSuspend) {
494            if (ThreadLocalCollector::should_collect_suspended(*this)) {
495                // Perform a TLC and pull the resulting garbage list into global garbage
496                ThreadLocalCollector tlc(_zone, state.get_stack_pointer(), *this);
497                // Blocks in the garbage list have already been marked by the roots scan.
498                // Since these blocks are known to be garbage, explicitly unmark them now to collect them in this cycle.
499                tlc.collect_suspended(register_range, stack_range);
500            }
501            resume();
502        }
503    }
504
505    void Thread::scan_other_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, bool withSuspend) {
506#ifdef __BLOCKS__
507        scan_other_thread(^(Thread *thread, const Range &range) { scanner(thread, range, arg); }, withSuspend);
508#else
509        thread_scanner_helper helper(scanner, arg);
510        scan_other_thread(helper, withSuspend);
511#endif
512    }
513
514    //
515    // suspend
516    //
517    // Temporarily suspend the thread from further execution.  Returns true if the thread is
518    // still alive.
519    //
520    void Thread::suspend()  {
521        // do not suspend this thread
522        if (is_current_thread() || !is_bound()) return;
523
524        if (_suspended == 0) {
525            // request thread suspension
526            kern_return_t err = thread_suspend(_thread);
527
528            if (err != KERN_SUCCESS) {
529                char thread_description[256];
530                description(thread_description, sizeof(thread_description));
531                auto_fatal("Thread::suspend():  unable to suspend a thread:  err = %d, %s\n", err, thread_description);
532            }
533        }
534        _suspended++;
535    }
536
537
538    //
539    // resume
540    //
541    // Resume a suspended thread.
542    //
543    void Thread::resume() {
544        // do not resume this thread
545        if (is_current_thread() || !is_bound()) return;
546
547        if (_suspended == 1) {
548            // request thread resumption
549            kern_return_t err = thread_resume(_thread);
550
551            if (err != KERN_SUCCESS) {
552                char thread_description[256];
553                description(thread_description, sizeof(thread_description));
554                auto_fatal("Thread::resume():  unable to resume a thread:  err = %d, %s\n", err, thread_description);
555            }
556        }
557        _suspended--;
558    }
559
560
561    char *Thread::description(char *buf, size_t bufsz) {
562        if (_pthread == NULL) {
563            snprintf(buf, bufsz, "Thread %p: unbound", this);
564        } else {
565            snprintf(buf, bufsz, "Thread %p: _pthread = %p, _thread = 0x%x, _stack_base = %p, enlivening %s, %d local blocks",
566                     this,_pthread, _thread, _stack_base,
567                     needs_enlivening().state ? " on" : "off", _localAllocations.count());
568        }
569        return buf;
570    }
571
572
573    extern "C" void auto_print_registered_threads() {
574        Zone *zone = Zone::zone();
575        Mutex lock(zone->threads_mutex());
576        Thread *thread = zone->threads();
577        while (thread != NULL) {
578            char thread_description[256];
579            thread->description(thread_description, sizeof(thread_description));
580            malloc_printf("%s\n", thread_description);
581            thread = thread->next();
582        }
583    }
584
585
586#ifdef __BLOCKS__
587    //
588    // dump
589    //
590    void Thread::dump(auto_zone_stack_dump stack_dump, auto_zone_register_dump register_dump, auto_zone_node_dump dump_local_block) {
591        Range stack_range;
592        // dump the registers.
593        if (!is_bound()) return;
594        if (register_dump) {
595            if (is_current_thread()) {
596                NonVolatileRegisters registers;
597
598                // scan the registers
599                Range range = registers.buffer_range();
600                //scanner.scan_range_from_registers(range, *this, 0);
601                register_dump(range.address(), range.size());
602                stack_range.set_range(__builtin_frame_address(0), _stack_base);
603            }
604            else {
605                unsigned user_count;
606                ThreadState state;
607                get_register_state(state, user_count);
608                register_dump(&state.data, user_count * sizeof(void *));
609                stack_range.set_range(state.get_stack_pointer(), _stack_base);
610            }
611        }
612        // dump the stack
613        if (stack_dump) stack_dump(stack_range.address(), stack_range.size());
614#if 0
615unsafe; thread might be in the middle of an STL set grow; need to put new locks into a tracing build to get this info safely
616        // dump the locals
617        if (!dump_local_block) return;
618        for (uint32_t i=_localAllocations.firstOccupiedSlot(); i<=_localAllocations.lastOccupiedSlot(); i++) {
619            void *block = _localAllocations[i];
620            if (block) {
621                Subzone *subzone = Subzone::subzone(block);
622                dump_local_block(block, subzone->size(block), subzone->layout(block), subzone->refcount(block));
623            }
624        }
625#endif
626    }
627
628    void Thread::visit(auto_zone_visitor_t *visitor) {
629        // dump the registers.
630        if (!is_bound()) return;
631        if (is_current_thread()) {
632            // snapshot the stack range.
633            auto_address_range_t stack_range = { (void *)auto_get_sp(), _stack_base };
634
635            // snapshot the registers.
636            NonVolatileRegisters registers;
637            Range range = registers.buffer_range();
638            auto_address_range_t registers_range = { range.address(), range.end() };
639            visitor->visit_thread(_pthread, stack_range, registers_range);
640        } else {
641            unsigned user_count;
642            ThreadState state;
643            get_register_state(state, user_count);
644            auto_address_range_t stack_range = { state.get_stack_pointer(), _stack_base };
645            auto_address_range_t registers_range = { &state.data, &state.data[user_count] };
646            visitor->visit_thread(_pthread, stack_range, registers_range);
647        }
648    }
649
650#endif /* __BLOCKS__ */
651
652};
653