/* * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de. * Distributed under the terms of the MIT License. */ #include #include #include #include #include #include #include "scheduler_tracing.h" #if SCHEDULER_TRACING namespace SchedulingAnalysis { using namespace SchedulerTracing; #if SCHEDULING_ANALYSIS_TRACING using namespace SchedulingAnalysisTracing; #endif struct ThreadWaitObject; struct HashObjectKey { virtual ~HashObjectKey() { } virtual uint32 HashKey() const = 0; }; struct HashObject { HashObject* next; virtual ~HashObject() { } virtual uint32 HashKey() const = 0; virtual bool Equals(const HashObjectKey* key) const = 0; }; struct ThreadKey : HashObjectKey { thread_id id; ThreadKey(thread_id id) : id(id) { } virtual uint32 HashKey() const { return id; } }; struct Thread : HashObject, scheduling_analysis_thread { ScheduleState state; bigtime_t lastTime; ThreadWaitObject* waitObject; Thread(thread_id id) : state(UNKNOWN), lastTime(0), waitObject(NULL) { this->id = id; name[0] = '\0'; runs = 0; total_run_time = 0; min_run_time = 1; max_run_time = -1; latencies = 0; total_latency = 0; min_latency = -1; max_latency = -1; reruns = 0; total_rerun_time = 0; min_rerun_time = -1; max_rerun_time = -1; unspecified_wait_time = 0; preemptions = 0; wait_objects = NULL; } virtual uint32 HashKey() const { return id; } virtual bool Equals(const HashObjectKey* _key) const { const ThreadKey* key = dynamic_cast(_key); if (key == NULL) return false; return key->id == id; } }; struct WaitObjectKey : HashObjectKey { uint32 type; void* object; WaitObjectKey(uint32 type, void* object) : type(type), object(object) { } virtual uint32 HashKey() const { return type ^ (uint32)(addr_t)object; } }; struct WaitObject : HashObject, scheduling_analysis_wait_object { WaitObject(uint32 type, void* object) { this->type = type; this->object = object; name[0] = '\0'; referenced_object = NULL; } virtual uint32 HashKey() const { return type ^ (uint32)(addr_t)object; } virtual bool Equals(const HashObjectKey* _key) const { const WaitObjectKey* key = dynamic_cast(_key); if (key == NULL) return false; return key->type == type && key->object == object; } }; struct ThreadWaitObjectKey : HashObjectKey { thread_id thread; uint32 type; void* object; ThreadWaitObjectKey(thread_id thread, uint32 type, void* object) : thread(thread), type(type), object(object) { } virtual uint32 HashKey() const { return thread ^ type ^ (uint32)(addr_t)object; } }; struct ThreadWaitObject : HashObject, scheduling_analysis_thread_wait_object { ThreadWaitObject(thread_id thread, WaitObject* waitObject) { this->thread = thread; wait_object = waitObject; wait_time = 0; waits = 0; next_in_list = NULL; } virtual uint32 HashKey() const { return thread ^ wait_object->type ^ (uint32)(addr_t)wait_object->object; } virtual bool Equals(const HashObjectKey* _key) const { const ThreadWaitObjectKey* key = dynamic_cast(_key); if (key == NULL) return false; return key->thread == thread && key->type == wait_object->type && key->object == wait_object->object; } }; class SchedulingAnalysisManager { public: SchedulingAnalysisManager(void* buffer, size_t size) : fBuffer(buffer), fSize(size), fHashTable(), fHashTableSize(0) { fAnalysis.thread_count = 0; fAnalysis.threads = 0; fAnalysis.wait_object_count = 0; fAnalysis.thread_wait_object_count = 0; size_t maxObjectSize = max_c(max_c(sizeof(Thread), sizeof(WaitObject)), sizeof(ThreadWaitObject)); fHashTableSize = size / (maxObjectSize + sizeof(HashObject*)); fHashTable = (HashObject**)((uint8*)fBuffer + fSize) - fHashTableSize; fNextAllocation = (uint8*)fBuffer; fRemainingBytes = (addr_t)fHashTable - (addr_t)fBuffer; image_info info; if (elf_get_image_info_for_address((addr_t)&scheduler_init, &info) == B_OK) { fKernelStart = (addr_t)info.text; fKernelEnd = (addr_t)info.data + info.data_size; } else { fKernelStart = 0; fKernelEnd = 0; } } const scheduling_analysis* Analysis() const { return &fAnalysis; } void* Allocate(size_t size) { size = (size + 7) & ~(size_t)7; if (size > fRemainingBytes) return NULL; void* address = fNextAllocation; fNextAllocation += size; fRemainingBytes -= size; return address; } void Insert(HashObject* object) { uint32 index = object->HashKey() % fHashTableSize; object->next = fHashTable[index]; fHashTable[index] = object; } void Remove(HashObject* object) { uint32 index = object->HashKey() % fHashTableSize; HashObject** slot = &fHashTable[index]; while (*slot != object) slot = &(*slot)->next; *slot = object->next; } HashObject* Lookup(const HashObjectKey& key) const { uint32 index = key.HashKey() % fHashTableSize; HashObject* object = fHashTable[index]; while (object != NULL && !object->Equals(&key)) object = object->next; return object; } Thread* ThreadFor(thread_id id) const { return dynamic_cast(Lookup(ThreadKey(id))); } WaitObject* WaitObjectFor(uint32 type, void* object) const { return dynamic_cast(Lookup(WaitObjectKey(type, object))); } ThreadWaitObject* ThreadWaitObjectFor(thread_id thread, uint32 type, void* object) const { return dynamic_cast( Lookup(ThreadWaitObjectKey(thread, type, object))); } status_t AddThread(thread_id id, const char* name) { Thread* thread = ThreadFor(id); if (thread == NULL) { void* memory = Allocate(sizeof(Thread)); if (memory == NULL) return B_NO_MEMORY; thread = new(memory) Thread(id); Insert(thread); fAnalysis.thread_count++; } if (name != NULL && thread->name[0] == '\0') strlcpy(thread->name, name, sizeof(thread->name)); return B_OK; } status_t AddWaitObject(uint32 type, void* object, WaitObject** _waitObject = NULL) { if (WaitObjectFor(type, object) != NULL) return B_OK; void* memory = Allocate(sizeof(WaitObject)); if (memory == NULL) return B_NO_MEMORY; WaitObject* waitObject = new(memory) WaitObject(type, object); Insert(waitObject); fAnalysis.wait_object_count++; // Set a dummy name for snooze() and waiting for signals, so we don't // try to update them later on. if (type == THREAD_BLOCK_TYPE_SNOOZE || type == THREAD_BLOCK_TYPE_SIGNAL) { strcpy(waitObject->name, "?"); } if (_waitObject != NULL) *_waitObject = waitObject; return B_OK; } status_t UpdateWaitObject(uint32 type, void* object, const char* name, void* referencedObject) { WaitObject* waitObject = WaitObjectFor(type, object); if (waitObject == NULL) return B_OK; if (waitObject->name[0] != '\0') { // This is a new object at the same address. Replace the old one. Remove(waitObject); status_t error = AddWaitObject(type, object, &waitObject); if (error != B_OK) return error; } if (name == NULL) name = "?"; strlcpy(waitObject->name, name, sizeof(waitObject->name)); waitObject->referenced_object = referencedObject; return B_OK; } bool UpdateWaitObjectDontAdd(uint32 type, void* object, const char* name, void* referencedObject) { WaitObject* waitObject = WaitObjectFor(type, object); if (waitObject == NULL || waitObject->name[0] != '\0') return false; if (name == NULL) name = "?"; strlcpy(waitObject->name, name, sizeof(waitObject->name)); waitObject->referenced_object = referencedObject; return B_OK; } status_t AddThreadWaitObject(Thread* thread, uint32 type, void* object) { WaitObject* waitObject = WaitObjectFor(type, object); if (waitObject == NULL) { // The algorithm should prevent this case. return B_ERROR; } ThreadWaitObject* threadWaitObject = ThreadWaitObjectFor(thread->id, type, object); if (threadWaitObject == NULL || threadWaitObject->wait_object != waitObject) { if (threadWaitObject != NULL) Remove(threadWaitObject); void* memory = Allocate(sizeof(ThreadWaitObject)); if (memory == NULL) return B_NO_MEMORY; threadWaitObject = new(memory) ThreadWaitObject(thread->id, waitObject); Insert(threadWaitObject); fAnalysis.thread_wait_object_count++; threadWaitObject->next_in_list = thread->wait_objects; thread->wait_objects = threadWaitObject; } thread->waitObject = threadWaitObject; return B_OK; } int32 MissingWaitObjects() const { // Iterate through the hash table and count the wait objects that don't // have a name yet. int32 count = 0; for (uint32 i = 0; i < fHashTableSize; i++) { HashObject* object = fHashTable[i]; while (object != NULL) { WaitObject* waitObject = dynamic_cast(object); if (waitObject != NULL && waitObject->name[0] == '\0') count++; object = object->next; } } return count; } status_t FinishAnalysis() { // allocate the thread array scheduling_analysis_thread** threads = (scheduling_analysis_thread**)Allocate( sizeof(Thread*) * fAnalysis.thread_count); if (threads == NULL) return B_NO_MEMORY; // Iterate through the hash table and collect all threads. Also polish // all wait objects that haven't been update yet. int32 index = 0; for (uint32 i = 0; i < fHashTableSize; i++) { HashObject* object = fHashTable[i]; while (object != NULL) { Thread* thread = dynamic_cast(object); if (thread != NULL) { threads[index++] = thread; } else if (WaitObject* waitObject = dynamic_cast(object)) { _PolishWaitObject(waitObject); } object = object->next; } } fAnalysis.threads = threads; dprintf("scheduling analysis: free bytes: %lu/%lu\n", fRemainingBytes, fSize); return B_OK; } private: void _PolishWaitObject(WaitObject* waitObject) { if (waitObject->name[0] != '\0') return; switch (waitObject->type) { case THREAD_BLOCK_TYPE_SEMAPHORE: { sem_info info; if (get_sem_info((sem_id)(addr_t)waitObject->object, &info) == B_OK) { strlcpy(waitObject->name, info.name, sizeof(waitObject->name)); } break; } case THREAD_BLOCK_TYPE_CONDITION_VARIABLE: { // If the condition variable object is in the kernel image, // assume, it is still initialized. ConditionVariable* variable = (ConditionVariable*)waitObject->object; if (!_IsInKernelImage(variable)) break; waitObject->referenced_object = (void*)variable->Object(); strlcpy(waitObject->name, variable->ObjectType(), sizeof(waitObject->name)); break; } case THREAD_BLOCK_TYPE_MUTEX: { // If the mutex object is in the kernel image, assume, it is // still initialized. mutex* lock = (mutex*)waitObject->object; if (!_IsInKernelImage(lock)) break; strlcpy(waitObject->name, lock->name, sizeof(waitObject->name)); break; } case THREAD_BLOCK_TYPE_RW_LOCK: { // If the mutex object is in the kernel image, assume, it is // still initialized. rw_lock* lock = (rw_lock*)waitObject->object; if (!_IsInKernelImage(lock)) break; strlcpy(waitObject->name, lock->name, sizeof(waitObject->name)); break; } case THREAD_BLOCK_TYPE_OTHER: { const char* name = (const char*)waitObject->object; if (name == NULL || _IsInKernelImage(name)) return; strlcpy(waitObject->name, name, sizeof(waitObject->name)); } case THREAD_BLOCK_TYPE_OTHER_OBJECT: case THREAD_BLOCK_TYPE_SNOOZE: case THREAD_BLOCK_TYPE_SIGNAL: default: break; } if (waitObject->name[0] != '\0') return; strcpy(waitObject->name, "?"); } bool _IsInKernelImage(const void* _address) { addr_t address = (addr_t)_address; return address >= fKernelStart && address < fKernelEnd; } private: scheduling_analysis fAnalysis; void* fBuffer; size_t fSize; HashObject** fHashTable; uint32 fHashTableSize; uint8* fNextAllocation; size_t fRemainingBytes; addr_t fKernelStart; addr_t fKernelEnd; }; static status_t analyze_scheduling(bigtime_t from, bigtime_t until, SchedulingAnalysisManager& manager) { // analyze how much threads and locking primitives we're talking about TraceEntryIterator iterator; iterator.MoveTo(INT_MAX); while (TraceEntry* _entry = iterator.Previous()) { SchedulerTraceEntry* baseEntry = dynamic_cast(_entry); if (baseEntry == NULL || baseEntry->Time() >= until) continue; if (baseEntry->Time() < from) break; status_t error = manager.AddThread(baseEntry->ThreadID(), baseEntry->Name()); if (error != B_OK) return error; if (ScheduleThread* entry = dynamic_cast(_entry)) { error = manager.AddThread(entry->PreviousThreadID(), NULL); if (error != B_OK) return error; if (entry->PreviousState() == B_THREAD_WAITING) { void* waitObject = (void*)entry->PreviousWaitObject(); switch (entry->PreviousWaitObjectType()) { case THREAD_BLOCK_TYPE_SNOOZE: case THREAD_BLOCK_TYPE_SIGNAL: waitObject = NULL; break; case THREAD_BLOCK_TYPE_SEMAPHORE: case THREAD_BLOCK_TYPE_CONDITION_VARIABLE: case THREAD_BLOCK_TYPE_MUTEX: case THREAD_BLOCK_TYPE_RW_LOCK: case THREAD_BLOCK_TYPE_OTHER: default: break; } error = manager.AddWaitObject(entry->PreviousWaitObjectType(), waitObject); if (error != B_OK) return error; } } } #if SCHEDULING_ANALYSIS_TRACING int32 startEntryIndex = iterator.Index(); #endif while (TraceEntry* _entry = iterator.Next()) { #if SCHEDULING_ANALYSIS_TRACING // might be info on a wait object if (WaitObjectTraceEntry* waitObjectEntry = dynamic_cast(_entry)) { status_t error = manager.UpdateWaitObject(waitObjectEntry->Type(), waitObjectEntry->Object(), waitObjectEntry->Name(), waitObjectEntry->ReferencedObject()); if (error != B_OK) return error; continue; } #endif SchedulerTraceEntry* baseEntry = dynamic_cast(_entry); if (baseEntry == NULL) continue; if (baseEntry->Time() >= until) break; if (ScheduleThread* entry = dynamic_cast(_entry)) { // scheduled thread Thread* thread = manager.ThreadFor(entry->ThreadID()); bigtime_t diffTime = entry->Time() - thread->lastTime; if (thread->state == READY) { // thread scheduled after having been woken up thread->latencies++; thread->total_latency += diffTime; if (thread->min_latency < 0 || diffTime < thread->min_latency) thread->min_latency = diffTime; if (diffTime > thread->max_latency) thread->max_latency = diffTime; } else if (thread->state == PREEMPTED) { // thread scheduled after having been preempted before thread->reruns++; thread->total_rerun_time += diffTime; if (thread->min_rerun_time < 0 || diffTime < thread->min_rerun_time) { thread->min_rerun_time = diffTime; } if (diffTime > thread->max_rerun_time) thread->max_rerun_time = diffTime; } if (thread->state == STILL_RUNNING) { // Thread was running and continues to run. thread->state = RUNNING; } if (thread->state != RUNNING) { thread->lastTime = entry->Time(); thread->state = RUNNING; } // unscheduled thread if (entry->ThreadID() == entry->PreviousThreadID()) continue; thread = manager.ThreadFor(entry->PreviousThreadID()); diffTime = entry->Time() - thread->lastTime; if (thread->state == STILL_RUNNING) { // thread preempted thread->runs++; thread->preemptions++; thread->total_run_time += diffTime; if (thread->min_run_time < 0 || diffTime < thread->min_run_time) thread->min_run_time = diffTime; if (diffTime > thread->max_run_time) thread->max_run_time = diffTime; thread->lastTime = entry->Time(); thread->state = PREEMPTED; } else if (thread->state == RUNNING) { // thread starts waiting (it hadn't been added to the run // queue before being unscheduled) thread->runs++; thread->total_run_time += diffTime; if (thread->min_run_time < 0 || diffTime < thread->min_run_time) thread->min_run_time = diffTime; if (diffTime > thread->max_run_time) thread->max_run_time = diffTime; if (entry->PreviousState() == B_THREAD_WAITING) { void* waitObject = (void*)entry->PreviousWaitObject(); switch (entry->PreviousWaitObjectType()) { case THREAD_BLOCK_TYPE_SNOOZE: case THREAD_BLOCK_TYPE_SIGNAL: waitObject = NULL; break; case THREAD_BLOCK_TYPE_SEMAPHORE: case THREAD_BLOCK_TYPE_CONDITION_VARIABLE: case THREAD_BLOCK_TYPE_MUTEX: case THREAD_BLOCK_TYPE_RW_LOCK: case THREAD_BLOCK_TYPE_OTHER: default: break; } status_t error = manager.AddThreadWaitObject(thread, entry->PreviousWaitObjectType(), waitObject); if (error != B_OK) return error; } thread->lastTime = entry->Time(); thread->state = WAITING; } else if (thread->state == UNKNOWN) { uint32 threadState = entry->PreviousState(); if (threadState == B_THREAD_WAITING || threadState == B_THREAD_SUSPENDED) { thread->lastTime = entry->Time(); thread->state = WAITING; } else if (threadState == B_THREAD_READY) { thread->lastTime = entry->Time(); thread->state = PREEMPTED; } } } else if (EnqueueThread* entry = dynamic_cast(_entry)) { // thread enqueued in run queue Thread* thread = manager.ThreadFor(entry->ThreadID()); if (thread->state == RUNNING || thread->state == STILL_RUNNING) { // Thread was running and is reentered into the run queue. This // is done by the scheduler, if the thread remains ready. thread->state = STILL_RUNNING; } else { // Thread was waiting and is ready now. bigtime_t diffTime = entry->Time() - thread->lastTime; if (thread->waitObject != NULL) { thread->waitObject->wait_time += diffTime; thread->waitObject->waits++; thread->waitObject = NULL; } else if (thread->state != UNKNOWN) thread->unspecified_wait_time += diffTime; thread->lastTime = entry->Time(); thread->state = READY; } } else if (RemoveThread* entry = dynamic_cast(_entry)) { // thread removed from run queue Thread* thread = manager.ThreadFor(entry->ThreadID()); // This really only happens when the thread priority is changed // while the thread is ready. bigtime_t diffTime = entry->Time() - thread->lastTime; if (thread->state == RUNNING) { // This should never happen. thread->runs++; thread->total_run_time += diffTime; if (thread->min_run_time < 0 || diffTime < thread->min_run_time) thread->min_run_time = diffTime; if (diffTime > thread->max_run_time) thread->max_run_time = diffTime; } else if (thread->state == READY || thread->state == PREEMPTED) { // Not really correct, but the case is rare and we keep it // simple. thread->unspecified_wait_time += diffTime; } thread->lastTime = entry->Time(); thread->state = WAITING; } } #if SCHEDULING_ANALYSIS_TRACING int32 missingWaitObjects = manager.MissingWaitObjects(); if (missingWaitObjects > 0) { iterator.MoveTo(startEntryIndex + 1); while (TraceEntry* _entry = iterator.Previous()) { if (WaitObjectTraceEntry* waitObjectEntry = dynamic_cast(_entry)) { if (manager.UpdateWaitObjectDontAdd( waitObjectEntry->Type(), waitObjectEntry->Object(), waitObjectEntry->Name(), waitObjectEntry->ReferencedObject())) { if (--missingWaitObjects == 0) break; } } } } #endif return B_OK; } } // namespace SchedulingAnalysis #endif // SCHEDULER_TRACING status_t _user_analyze_scheduling(bigtime_t from, bigtime_t until, void* buffer, size_t size, scheduling_analysis* analysis) { #if SCHEDULER_TRACING using namespace SchedulingAnalysis; if ((addr_t)buffer & 0x7) { addr_t diff = (addr_t)buffer & 0x7; buffer = (void*)((addr_t)buffer + 8 - diff); size -= 8 - diff; } size &= ~(size_t)0x7; if (buffer == NULL || !IS_USER_ADDRESS(buffer) || size == 0) return B_BAD_VALUE; status_t error = lock_memory(buffer, size, B_READ_DEVICE); if (error != B_OK) return error; SchedulingAnalysisManager manager(buffer, size); InterruptsLocker locker; lock_tracing_buffer(); error = analyze_scheduling(from, until, manager); unlock_tracing_buffer(); locker.Unlock(); if (error == B_OK) error = manager.FinishAnalysis(); unlock_memory(buffer, size, B_READ_DEVICE); if (error == B_OK) { error = user_memcpy(analysis, manager.Analysis(), sizeof(scheduling_analysis)); } return error; #else return B_BAD_VALUE; #endif }