1/*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20/*
21    Thread.h
22    Registered Thread Management
23    Copyright (c) 2004-2011 Apple Inc. All rights reserved.
24 */
25
26#pragma once
27#ifndef __AUTO_THREAD__
28#define __AUTO_THREAD__
29
30
31#include "Definitions.h"
32#include "PointerHash.h"
33#include "Locks.h"
34#include "Subzone.h"
35#include "AllocationCache.h"
36
37namespace Auto {
38
39    //
40    // Forward declarations
41    //
42    class Zone;
43    class ThreadLocalCollector;
44
45    //
46    // LocalBlocksHash
47    // class for holding per-thread objects and for each, two marks
48    // XXX todo: simplify PointerHash to be/use standard C++ hash table
49    //
50    class LocalBlocksHash : public PointerHash {
51    public:
52        enum {
53            FlagScanned = 0x1,
54            FlagMarked = 0x2,
55        };
56
57        LocalBlocksHash(int initialCapacity) : PointerHash(initialCapacity) {}
58
59        inline void setScanned(uint32_t index) { setFlag(index, FlagScanned); }
60        inline void setScanned(void *p) { int32_t i = slotIndex(p); if (i != -1) setScanned(i); }
61        inline bool wasScanned(uint32_t index) { return flagSet(index, FlagScanned); }
62
63        inline void setMarked(uint32_t index) { setFlag(index, FlagMarked); }
64        inline void setMarked(void *p) { int32_t i = slotIndex(p); if (i != -1) setMarked(i); }
65        inline bool wasMarked(uint32_t index) { return flagSet(index, FlagMarked); }
66
67        inline bool testAndSetMarked(uint32_t index) {
68            bool old = wasMarked(index);
69            if (!old) setMarked(index);
70            return old;
71        }
72
73        // Shark says all these loads are expensive.
74        inline void *markedPointerAtIndex(uint32_t index) {
75            vm_address_t value = _pointers[index];
76            void *pointer = (void *) (value & ~FlagsMask);
77            return ((value & FlagMarked) ? pointer : NULL);
78        }
79
80        inline void *unmarkedPointerAtIndex(uint32_t index) {
81            vm_address_t value = _pointers[index];
82            void *pointer = (void *) (value & ~FlagsMask);
83            return ((value & FlagMarked) ? NULL : ((value == (vm_address_t)RemovedEntry) ? NULL : pointer));
84        }
85
86        inline void *markedUnscannedPointerAtIndex(uint32_t index) {
87            vm_address_t value = _pointers[index];
88            void *pointer = (void *) (value & ~FlagsMask);
89            return ((value & (FlagMarked|FlagScanned)) == FlagMarked ? pointer : NULL);
90        }
91
92        inline void clearFlagsRehash()  { rehash(FlagScanned | FlagMarked); }
93        inline void clearFlagsCompact() { compact(FlagScanned | FlagMarked); }
94        inline bool isFull() { return count() >= local_allocations_size_limit; }
95
96        inline size_t localsSize() {
97            size_t size = 0;
98            for (uint32_t i = firstOccupiedSlot(), last = lastOccupiedSlot(); i <= last; i++) {
99                void *block = (*this)[i];
100                if (block) {
101                    Subzone *subzone = Subzone::subzone(block);
102                    usword_t q = subzone->quantum_index_unchecked(block);
103                    size += subzone->size(q);
104                }
105            }
106            return size;
107        }
108    };
109
110
111
112    //----- NonVolatileRegisters -----//
113
114    //
115    // Used to capture the register state of the current thread.
116    //
117
118
119
120    class NonVolatileRegisters {
121      private:
122#if defined(__i386__)
123        // Non-volatile registers are: ebx, ebp, esp, esi, edi
124        usword_t _registers[5];  // buffer for capturing registers
125
126        //
127        // capture_registers
128        //
129        // Capture the state of the non-volatile registers.
130        //
131        static inline void capture_registers(register usword_t *registers) {
132            __asm__ volatile ("mov %%ebx,  0(%[registers]) \n"
133                              "mov %%ebp,  4(%[registers]) \n"
134                              "mov %%esp,  8(%[registers]) \n"
135                              "mov %%esi, 12(%[registers]) \n"
136                              "mov %%edi, 16(%[registers]) \n"
137                              : : [registers] "a" (registers) : "memory");
138        }
139#elif defined(__x86_64__)
140        // Non-volatile registers are: rbx rsp rbp r12-r15
141        usword_t _registers[7];  // buffer for capturing registers
142
143        //
144        // capture_registers
145        //
146        // Capture the state of the non-volatile registers.
147        //
148        static inline void capture_registers(register usword_t *registers) {
149            __asm__ volatile ("movq %%rbx,  0(%[registers]) \n"
150                              "movq %%rsp,  8(%[registers]) \n"
151                              "movq %%rbp, 16(%[registers]) \n"
152                              "movq %%r12, 24(%[registers]) \n"
153                              "movq %%r13, 32(%[registers]) \n"
154                              "movq %%r14, 40(%[registers]) \n"
155                              "movq %%r15, 48(%[registers]) \n"
156                              : : [registers] "a" (registers) : "memory");
157        }
158#elif defined(__arm__)
159        // Non-volatile registers are: r4..r8, r10, r11
160        // r9 is saved for simplicity.
161        usword_t _registers[8];  // buffer for capturing registers
162
163        //
164        // capture_registers
165        //
166        // Capture the state of the non-volatile registers.
167        //
168        static inline void capture_registers(register usword_t *registers) {
169            __asm__ volatile ("stmia %[registers], {r4-r11}"
170                              : : [registers] "r" (registers) : "memory");
171        }
172
173#else
174#error Unknown Architecture
175#endif
176
177      public:
178
179        //
180        // Constructor
181        //
182        NonVolatileRegisters() { capture_registers(_registers); }
183
184
185        //
186        // buffer_range
187        //
188        // Returns the range of captured registers buffer.
189        //
190        inline Range buffer_range() { return Range(_registers, sizeof(_registers)); }
191
192    };
193
194    //----- Thread -----//
195
196    //
197    // Track threads that need will be scanned during gc.
198    //
199
200    union ThreadState;
201
202    class Thread : public AuxAllocated {
203
204      private:
205
206        Thread      *_next;                                 // next thread in linked list
207        Zone        *_zone;                                 // managing zone
208        pthread_t   _pthread;                               // posix thread
209        mach_port_t _thread;                                // mach thread
210        void        *_stack_base;                           // cached thread stack base (pthread_get_stackaddr_np(_pthread)).
211        LockedBoolean _scanning;                            // if state is true, collector is scanning, unbind will block.
212        uint32_t    _suspended;                             // records suspend count.
213        void        *_stack_scan_peak;                      // lowest scanned stack address, for stack clearing
214        ThreadLocalCollector *_tlc;                         // if a TLC is in progress, this is the collector. Otherwise NULL.
215        AllocationCache _allocation_cache[2];               // caches[scanned/unscanned], one for each quanta size, slot 0 is for large clumps
216
217        LocalBlocksHash _localAllocations;                  // holds blocks local to this thread
218        sentinel_t   _localsGuard;
219
220        LockedBoolean   _needs_enlivening;                  // per-thread support for Zone::enlivening_barrier().
221        int32_t     _destructor_count;                      // tracks the number of times the pthread's key destructor has been called
222
223        bool        _in_collector;                          // used to indicate that a thread is running inside the collector itself
224        uint32_t    _tlc_watchdog_counter;                  // used to detect when the thread is idle so the heap collector can run a TLC
225        LockedBoolean _in_compaction;                       // per-thread support for compaction read-barrier.
226        Subzone::PendingCountAccumulator *_pending_count_accumulator; // buffers adjustments to subzone pending count
227
228        // Buffer used by thread local collector. Enough space to hold max possible local blocks. Last so we don't touch as many pages if it doesn't fill up.
229        void        *_tlc_buffer[local_allocations_size_limit];
230
231        void get_register_state(ThreadState &state, unsigned &user_count);
232
233        //
234        // remove_local
235        //
236        // remove block from local set.  Assumes its there.
237        //
238        inline void remove_local(void *block) {
239            Sentinel guard(_localsGuard);
240            _localAllocations.remove(block);
241        }
242
243        void flush_cache(AllocationCache &cache);
244
245        //
246        // block_escaped_internal
247        //
248        // a block is escaping the stack; remove it from local set (cheaply)
249        //
250        template <class BlockRef> void block_escaped_internal(BlockRef block);
251
252    public:
253
254        //
255        // Constructor. Makes a Thread which is bound to the calling pthread.
256        //
257        Thread(Zone *zone);
258        ~Thread();
259
260        //
261        // bind
262        //
263        // Associate the Thread with the calling pthread.
264        // This declares the Zone's interest in scanning the calling pthread's stack during collections.
265        //
266        void bind();
267
268        //
269        // unbind
270        //
271        // Disassociate the Thread from the calling pthread.
272        // May only be called from the same pthread that previously called bind().
273        // unbind() synchronizes with stack scanning to ensure that if a stack scan is in progress
274        // the stack will remain available until scanning is complete. Returns true if the thread
275        // object can be immediately deleted.
276        //
277        bool unbind();
278
279        //
280        // lockForScanning
281        //
282        // Locks down a thread before concurrent scanning. This blocks a concurrent call to
283        // unbind(), so a pthread cannot exit while its stack is being concurrently scanned.
284        // Returns true if the thread is currently bound, and thus is known to have a valid stack.
285        //
286        bool lockForScanning();
287
288        //
289        // unlockForScanning
290        //
291        // Relinquishes the scanning lock, which unblocks a concurrent call to unbind().
292        //
293        void unlockForScanning();
294
295        //
296        // Accessors
297        //
298        inline Thread      *next()                { return _next; }
299        inline Zone        *zone()                { return _zone; }
300        inline pthread_t   pthread()              { return _pthread; }
301        inline mach_port_t thread()               { return _thread; }
302        inline void        set_next(Thread *next) { _next = next; }
303        inline AllocationCache &allocation_cache(const usword_t layout) { return _allocation_cache[layout & AUTO_UNSCANNED]; }
304        inline void        *stack_base()          { return _stack_base; }
305        inline LocalBlocksHash &locals()          { return _localAllocations; }
306        inline sentinel_t &localsGuard()          { return _localsGuard; }
307        inline bool       is_bound()              { return _pthread != NULL; }
308        inline int32_t    increment_tsd_count()   { return ++_destructor_count; }
309        inline void       set_in_collector(bool value) { _in_collector = value; }
310        inline bool       in_collector() const    { return _in_collector; }
311        inline void       set_thread_local_collector(ThreadLocalCollector *c) { _tlc = c; }
312        inline ThreadLocalCollector *thread_local_collector() { return _tlc; }
313        inline void       **tlc_buffer()          { return _tlc_buffer; }
314
315        inline bool       tlc_watchdog_should_trigger() { return _tlc_watchdog_counter == 4; }
316        inline void       tlc_watchdog_disable()  { _tlc_watchdog_counter = 5; }
317        inline void       tlc_watchdog_reset()    { _tlc_watchdog_counter = 0; }
318        inline void       tlc_watchdog_tickle()   { if (_tlc_watchdog_counter < 4) _tlc_watchdog_counter++; }
319        inline void       set_pending_count_accumulator(Subzone::PendingCountAccumulator *a) { _pending_count_accumulator = a; }
320        inline Subzone::PendingCountAccumulator   *pending_count_accumulator() const { return _pending_count_accumulator; }
321
322        //
323        // Per-thread envlivening, to reduce lock contention across threads while scanning.
324        // These are manipulated by Zone::set_needs_enlivening() / clear_needs_enlivening().
325        //
326        // FIXME:  can we make this lockless altogether?
327        //
328        LockedBoolean     &needs_enlivening()     { return _needs_enlivening; }
329
330        // BlockRef FIXME: retire
331        void enliven_block(void *block);
332
333        //
334        // Per-thread compaction condition.
335        //
336        LockedBoolean     &in_compaction()        { return _in_compaction; }
337
338        //
339        // clear_stack
340        //
341        // clears stack memory from the current sp to the depth that was scanned by the last collection
342        //
343        void clear_stack();
344
345        //
346        // is_current_stack_address
347        //
348        // If the current thread is registered with the collector, returns true if the given address is within the address
349        // range of the current thread's stack. This code assumes that calling pthread_getspecific() is faster than calling
350        // pthread_get_stackaddr_np() followed by pthread_get_stacksize_np().
351        //
352        inline bool is_stack_address(void *address) {
353            Range stack(__builtin_frame_address(0), _stack_base);
354            return (stack.in_range(address));
355        }
356
357        //
358        // block_escaped
359        //
360        // inline wrapper around block_escaped_internal to catch the non-local case without making a function call
361        //
362        template <class BlockRef> inline void block_escaped(BlockRef block) {
363            if (block.is_thread_local()) block_escaped_internal(block);
364        }
365
366        //
367        // check_for_escape
368        //
369        // an assignment is happening.  Check for an escape, e.g. global = local
370        //
371        template <class DestBlock, class ValueBlock> void track_local_assignment(DestBlock dst, ValueBlock value)
372        {
373            bool blockStayedLocal = false;
374            if (value.is_thread_local()) {
375                if (dst.is_live_thread_local()) {
376                    dst.set_scan_local_block();
377                    //valueSubzone->set_stored_in_heap(value);
378                    blockStayedLocal = true;
379                }
380                if (!blockStayedLocal) {
381                    block_escaped_internal(value);
382                }
383            }
384        }
385
386
387        //
388        // track_local_memcopy
389        //
390        // make sure that dest is marked if any src's are local,
391        // otherwise escape all srcs that are local
392        //
393        void track_local_memcopy(const void *src, void *dst, size_t size);
394
395        //
396        // add_local_allocation
397        //
398        // add a block to this thread's set of tracked local allocations
399        //
400        void add_local_allocation(void *block) {
401            // Limit the size of local block set. This should only trigger rarely.
402            Sentinel guard(_localsGuard);
403            if (_localAllocations.isFull())
404                flush_local_blocks();
405            _localAllocations.add(block);
406        }
407
408        //
409        // flush_local_blocks
410        //
411        // empties the local allocations hash, making all blocks global
412        //
413        void flush_local_blocks();
414
415        //
416        // reap_all_local_blocks
417        //
418        // finalize and free all local blocks without doing any scanning
419        // should only be called when it is known the stack cannot root anything, such as thread exit
420        //
421        void reap_all_local_blocks();
422
423        //
424        // scan_current_thread
425        //
426        // Scan the current thread stack and registers for block references.
427        //
428#ifdef __BLOCKS__
429        typedef void (^thread_scanner_t) (Thread *thread, const Range &range);
430#else
431        class thread_scanner {
432        public:
433            virtual void operator() (Thread *thread, const Range &range) = 0;
434        };
435        typedef thread_scanner &thread_scanner_t;
436#endif
437
438        void scan_current_thread(thread_scanner_t scanner, void *stack_bottom);
439        void scan_current_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, void *stack_bottom);
440
441        //
442        // scan_other_thread
443        //
444        // Scan a thread other than the current thread stack and registers for block references.
445        //
446        void scan_other_thread(thread_scanner_t scanner, bool withSuspend);
447        void scan_other_thread(void (*scanner) (Thread*, const Range&, void*), void *arg, bool withSuspend);
448
449#ifdef __BLOCKS__
450        //
451        // dump local objects
452        //
453        // use callout to dump local objects
454        //
455        void dump(auto_zone_stack_dump stack_dump, auto_zone_register_dump register_dump, auto_zone_node_dump dump_local_block);
456
457        //
458        // visit
459        //
460        // visits the thread's stack and registers.
461        //
462        void visit(auto_zone_visitor_t *visitor);
463#endif
464
465        //
466        // is_current_thread
467        //
468        // Returns true if the this thread is the current thread.
469        //
470        inline bool is_current_thread() const {
471            return pthread_self() == _pthread;
472        }
473
474
475        //
476        // thread_cache_add
477        //
478        // return memory to the thread local cache
479        // returns true if the block was cached, false if it could not be cached
480        //
481        bool thread_cache_add(void *block, Subzone *subzone, usword_t q);
482
483
484        //
485        // unlink
486        //
487        // Unlink the thread from the list of threads.
488        //
489        inline void unlink(Thread **link) {
490            for (Thread *t = *link; t; link = &t->_next, t = *link) {
491                // if found
492                if (t == this) {
493                    // mend the link
494                    *link = t->_next;
495                    break;
496                }
497            }
498        }
499
500
501        //
502        // scavenge_threads
503        //
504        // Walks the list of threads, looking for unbound threads.
505        // These are no longer in use, and can be safely deleted.
506        //
507        static void scavenge_threads(Thread **active_link, Thread **inactive_link) {
508            while (Thread *thread = *active_link) {
509                SpinLock lock(&thread->_scanning.lock);
510                if (!thread->is_bound()) {
511                    // remove thread from the active list.
512                    *active_link = thread->_next;
513                    // put thread on the inactive list.
514                    thread->_next = *inactive_link;
515                    *inactive_link = thread;
516                } else {
517                    active_link = &thread->_next;
518                }
519            }
520        }
521
522        //
523        // suspend
524        //
525        // Temporarily suspend the thread from further execution. Logs and terminates process on failure.
526        //
527        void suspend();
528
529        //
530        // resume
531        //
532        // Resume a suspended thread. Logs and terminates process on failure.
533        //
534        void resume();
535
536        bool suspended() { return _suspended != 0; }
537
538        //
539        // description
540        //
541        // fills in buf with a textual description of the Thread, for debugging
542        // returns buf
543        //
544        char *description(char *buf, size_t bufsz);
545
546    };
547
548    // BlockRef FIXME: temporary glue code until all call sites convert to BlockRef.
549    template <> void Thread::block_escaped<void *>(void *block);
550};
551
552#endif // __AUTO_THREAD__
553
554