xray_profiling.cpp revision 360784
1//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// This is the implementation of a profiling handler.
12//
13//===----------------------------------------------------------------------===//
14#include <memory>
15#include <time.h>
16
17#include "sanitizer_common/sanitizer_atomic.h"
18#include "sanitizer_common/sanitizer_flags.h"
19#include "xray/xray_interface.h"
20#include "xray/xray_log_interface.h"
21#include "xray_buffer_queue.h"
22#include "xray_flags.h"
23#include "xray_profile_collector.h"
24#include "xray_profiling_flags.h"
25#include "xray_recursion_guard.h"
26#include "xray_tsc.h"
27#include "xray_utils.h"
28#include <pthread.h>
29
30namespace __xray {
31
32namespace {
33
34static atomic_sint32_t ProfilerLogFlushStatus = {
35    XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
36
37static atomic_sint32_t ProfilerLogStatus = {
38    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
39
40static SpinMutex ProfilerOptionsMutex;
41
42struct ProfilingData {
43  atomic_uintptr_t Allocators;
44  atomic_uintptr_t FCT;
45};
46
47static pthread_key_t ProfilingKey;
48
49// We use a global buffer queue, which gets initialized once at initialisation
50// time, and gets reset when profiling is "done".
51static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
52    BufferQueueStorage;
53static BufferQueue *BQ = nullptr;
54
55thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
56thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
57                                  alignof(FunctionCallTrie::Allocators)>::type
58    AllocatorsStorage;
59thread_local std::aligned_storage<sizeof(FunctionCallTrie),
60                                  alignof(FunctionCallTrie)>::type
61    FunctionCallTrieStorage;
62thread_local ProfilingData TLD{{0}, {0}};
63thread_local atomic_uint8_t ReentranceGuard{0};
64
65// We use a separate guard for ensuring that for this thread, if we're already
66// cleaning up, that any signal handlers don't attempt to cleanup nor
67// initialise.
68thread_local atomic_uint8_t TLDInitGuard{0};
69
70// We also use a separate latch to signal that the thread is exiting, and
71// non-essential work should be ignored (things like recording events, etc.).
72thread_local atomic_uint8_t ThreadExitingLatch{0};
73
74static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
75  thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
76    pthread_setspecific(ProfilingKey, &TLD);
77    return false;
78  }();
79  (void)ThreadOnce;
80
81  RecursionGuard TLDInit(TLDInitGuard);
82  if (!TLDInit)
83    return nullptr;
84
85  if (atomic_load_relaxed(&ThreadExitingLatch))
86    return nullptr;
87
88  uptr Allocators = 0;
89  if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
90                                     memory_order_acq_rel)) {
91    bool Success = false;
92    auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
93      if (!Success)
94        atomic_store(&TLD.Allocators, 0, memory_order_release);
95    });
96
97    // Acquire a set of buffers for this thread.
98    if (BQ == nullptr)
99      return nullptr;
100
101    if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
102      return nullptr;
103    auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
104      if (!Success)
105        BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
106    });
107
108    if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
109      return nullptr;
110    auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
111      if (!Success)
112        BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
113    });
114
115    if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
116        BufferQueue::ErrorCode::Ok)
117      return nullptr;
118    auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
119      if (!Success)
120        BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
121    });
122
123    if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
124        BufferQueue::ErrorCode::Ok)
125      return nullptr;
126
127    Success = true;
128    new (&AllocatorsStorage) FunctionCallTrie::Allocators(
129        FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
130    Allocators = reinterpret_cast<uptr>(
131        reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
132    atomic_store(&TLD.Allocators, Allocators, memory_order_release);
133  }
134
135  if (Allocators == 1)
136    return nullptr;
137
138  uptr FCT = 0;
139  if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
140    new (&FunctionCallTrieStorage)
141        FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
142            atomic_load_relaxed(&TLD.Allocators)));
143    FCT = reinterpret_cast<uptr>(
144        reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
145    atomic_store(&TLD.FCT, FCT, memory_order_release);
146  }
147
148  if (FCT == 1)
149    return nullptr;
150
151  return &TLD;
152}
153
154static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
155  auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
156  if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
157                 &FunctionCallTrieStorage)))
158    reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
159
160  auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
161  if (Allocators ==
162      reinterpret_cast<uptr>(
163          reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
164    reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
165}
166
167static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
168  RecursionGuard TLDInit(TLDInitGuard);
169  if (!TLDInit)
170    return;
171
172  uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
173  if (P != reinterpret_cast<uptr>(
174               reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
175    return;
176
177  auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
178  DCHECK_NE(FCT, nullptr);
179
180  uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
181  if (A !=
182      reinterpret_cast<uptr>(
183          reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
184    return;
185
186  auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
187  DCHECK_NE(Allocators, nullptr);
188
189  // Always move the data into the profile collector.
190  profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
191                                std::move(ThreadBuffers), GetTid());
192
193  // Re-initialize the ThreadBuffers object to a known "default" state.
194  ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
195}
196
197} // namespace
198
199const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
200#ifdef XRAY_PROFILER_DEFAULT_OPTIONS
201  return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
202#else
203  return "";
204#endif
205}
206
207XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
208  if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
209      XRayLogInitStatus::XRAY_LOG_FINALIZED) {
210    if (Verbosity())
211      Report("Not flushing profiles, profiling not been finalized.\n");
212    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
213  }
214
215  RecursionGuard SignalGuard(ReentranceGuard);
216  if (!SignalGuard) {
217    if (Verbosity())
218      Report("Cannot finalize properly inside a signal handler!\n");
219    atomic_store(&ProfilerLogFlushStatus,
220                 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
221                 memory_order_release);
222    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
223  }
224
225  s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
226                                 XRayLogFlushStatus::XRAY_LOG_FLUSHING,
227                                 memory_order_acq_rel);
228  if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
229    if (Verbosity())
230      Report("Not flushing profiles, implementation still flushing.\n");
231    return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
232  }
233
234  // At this point, we'll create the file that will contain the profile, but
235  // only if the options say so.
236  if (!profilingFlags()->no_flush) {
237    // First check whether we have data in the profile collector service
238    // before we try and write anything down.
239    XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
240    if (B.Data == nullptr) {
241      if (Verbosity())
242        Report("profiling: No data to flush.\n");
243    } else {
244      LogWriter *LW = LogWriter::Open();
245      if (LW == nullptr) {
246        if (Verbosity())
247          Report("profiling: Failed to flush to file, dropping data.\n");
248      } else {
249        // Now for each of the buffers, write out the profile data as we would
250        // see it in memory, verbatim.
251        while (B.Data != nullptr && B.Size != 0) {
252          LW->WriteAll(reinterpret_cast<const char *>(B.Data),
253                       reinterpret_cast<const char *>(B.Data) + B.Size);
254          B = profileCollectorService::nextBuffer(B);
255        }
256      }
257      LogWriter::Close(LW);
258    }
259  }
260
261  profileCollectorService::reset();
262
263  atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
264               memory_order_release);
265  atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
266               memory_order_release);
267
268  return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
269}
270
271void profilingHandleArg0(int32_t FuncId,
272                         XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
273  unsigned char CPU;
274  auto TSC = readTSC(CPU);
275  RecursionGuard G(ReentranceGuard);
276  if (!G)
277    return;
278
279  auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
280  if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
281               Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
282    return;
283
284  if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
285               Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
286    postCurrentThreadFCT(TLD);
287    return;
288  }
289
290  auto T = getThreadLocalData();
291  if (T == nullptr)
292    return;
293
294  auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
295  switch (Entry) {
296  case XRayEntryType::ENTRY:
297  case XRayEntryType::LOG_ARGS_ENTRY:
298    FCT->enterFunction(FuncId, TSC, CPU);
299    break;
300  case XRayEntryType::EXIT:
301  case XRayEntryType::TAIL:
302    FCT->exitFunction(FuncId, TSC, CPU);
303    break;
304  default:
305    // FIXME: Handle bugs.
306    break;
307  }
308}
309
310void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
311                         uint64_t) XRAY_NEVER_INSTRUMENT {
312  return profilingHandleArg0(FuncId, Entry);
313}
314
315XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
316  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
317  if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
318                                      XRayLogInitStatus::XRAY_LOG_FINALIZING,
319                                      memory_order_release)) {
320    if (Verbosity())
321      Report("Cannot finalize profile, the profiling is not initialized.\n");
322    return static_cast<XRayLogInitStatus>(CurrentStatus);
323  }
324
325  // Mark then finalize the current generation of buffers. This allows us to let
326  // the threads currently holding onto new buffers still use them, but let the
327  // last reference do the memory cleanup.
328  DCHECK_NE(BQ, nullptr);
329  BQ->finalize();
330
331  // Wait a grace period to allow threads to see that we're finalizing.
332  SleepForMillis(profilingFlags()->grace_period_ms);
333
334  // If we for some reason are entering this function from an instrumented
335  // handler, we bail out.
336  RecursionGuard G(ReentranceGuard);
337  if (!G)
338    return static_cast<XRayLogInitStatus>(CurrentStatus);
339
340  // Post the current thread's data if we have any.
341  postCurrentThreadFCT(TLD);
342
343  // Then we force serialize the log data.
344  profileCollectorService::serialize();
345
346  atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
347               memory_order_release);
348  return XRayLogInitStatus::XRAY_LOG_FINALIZED;
349}
350
351XRayLogInitStatus
352profilingLoggingInit(size_t, size_t, void *Options,
353                     size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
354  RecursionGuard G(ReentranceGuard);
355  if (!G)
356    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
357
358  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
359  if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
360                                      XRayLogInitStatus::XRAY_LOG_INITIALIZING,
361                                      memory_order_acq_rel)) {
362    if (Verbosity())
363      Report("Cannot initialize already initialised profiling "
364             "implementation.\n");
365    return static_cast<XRayLogInitStatus>(CurrentStatus);
366  }
367
368  {
369    SpinMutexLock Lock(&ProfilerOptionsMutex);
370    FlagParser ConfigParser;
371    ProfilerFlags Flags;
372    Flags.setDefaults();
373    registerProfilerFlags(&ConfigParser, &Flags);
374    ConfigParser.ParseString(profilingCompilerDefinedFlags());
375    const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
376    if (Env == nullptr)
377      Env = "";
378    ConfigParser.ParseString(Env);
379
380    // Then parse the configuration string provided.
381    ConfigParser.ParseString(static_cast<const char *>(Options));
382    if (Verbosity())
383      ReportUnrecognizedFlags();
384    *profilingFlags() = Flags;
385  }
386
387  // We need to reset the profile data collection implementation now.
388  profileCollectorService::reset();
389
390  // Then also reset the buffer queue implementation.
391  if (BQ == nullptr) {
392    bool Success = false;
393    new (&BufferQueueStorage)
394        BufferQueue(profilingFlags()->per_thread_allocator_max,
395                    profilingFlags()->buffers_max, Success);
396    if (!Success) {
397      if (Verbosity())
398        Report("Failed to initialize preallocated memory buffers!");
399      atomic_store(&ProfilerLogStatus,
400                   XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
401                   memory_order_release);
402      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
403    }
404
405    // If we've succeded, set the global pointer to the initialised storage.
406    BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
407  } else {
408    BQ->finalize();
409    auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
410                               profilingFlags()->buffers_max);
411
412    if (InitStatus != BufferQueue::ErrorCode::Ok) {
413      if (Verbosity())
414        Report("Failed to initialize preallocated memory buffers; error: %s",
415               BufferQueue::getErrorString(InitStatus));
416      atomic_store(&ProfilerLogStatus,
417                   XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
418                   memory_order_release);
419      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
420    }
421
422    DCHECK(!BQ->finalizing());
423  }
424
425  // We need to set up the exit handlers.
426  static pthread_once_t Once = PTHREAD_ONCE_INIT;
427  pthread_once(
428      &Once, +[] {
429        pthread_key_create(
430            &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
431              if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
432                return;
433
434              if (P == nullptr)
435                return;
436
437              auto T = reinterpret_cast<ProfilingData *>(P);
438              if (atomic_load_relaxed(&T->Allocators) == 0)
439                return;
440
441              {
442                // If we're somehow executing this while inside a
443                // non-reentrant-friendly context, we skip attempting to post
444                // the current thread's data.
445                RecursionGuard G(ReentranceGuard);
446                if (!G)
447                  return;
448
449                postCurrentThreadFCT(*T);
450              }
451            });
452
453        // We also need to set up an exit handler, so that we can get the
454        // profile information at exit time. We use the C API to do this, to not
455        // rely on C++ ABI functions for registering exit handlers.
456        Atexit(+[]() XRAY_NEVER_INSTRUMENT {
457          if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
458            return;
459
460          auto Cleanup =
461              at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
462
463          // Finalize and flush.
464          if (profilingFinalize() != XRAY_LOG_FINALIZED ||
465              profilingFlush() != XRAY_LOG_FLUSHED)
466            return;
467
468          if (Verbosity())
469            Report("XRay Profile flushed at exit.");
470        });
471      });
472
473  __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
474  __xray_set_handler(profilingHandleArg0);
475  __xray_set_handler_arg1(profilingHandleArg1);
476
477  atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
478               memory_order_release);
479  if (Verbosity())
480    Report("XRay Profiling init successful.\n");
481
482  return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
483}
484
485bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
486  // Set up the flag defaults from the static defaults and the
487  // compiler-provided defaults.
488  {
489    SpinMutexLock Lock(&ProfilerOptionsMutex);
490    auto *F = profilingFlags();
491    F->setDefaults();
492    FlagParser ProfilingParser;
493    registerProfilerFlags(&ProfilingParser, F);
494    ProfilingParser.ParseString(profilingCompilerDefinedFlags());
495  }
496
497  XRayLogImpl Impl{
498      profilingLoggingInit,
499      profilingFinalize,
500      profilingHandleArg0,
501      profilingFlush,
502  };
503  auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
504  if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
505    if (Verbosity())
506      Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
507             "%d\n",
508             RegistrationResult);
509    return false;
510  }
511
512  if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
513    __xray_log_select_mode("xray_profiling");
514  return true;
515}
516
517} // namespace __xray
518
519static auto UNUSED Unused = __xray::profilingDynamicInitializer();
520