PerfJITEventListener.cpp revision 360784
1//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a JITEventListener object that tells perf about JITted
10// functions, including source line information.
11//
12// Documentation for perf jit integration is available at:
13// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
14// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/Twine.h"
19#include "llvm/Config/config.h"
20#include "llvm/DebugInfo/DWARF/DWARFContext.h"
21#include "llvm/ExecutionEngine/JITEventListener.h"
22#include "llvm/Object/ObjectFile.h"
23#include "llvm/Object/SymbolSize.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Errno.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/ManagedStatic.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/Mutex.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/Process.h"
32#include "llvm/Support/Threading.h"
33#include "llvm/Support/raw_ostream.h"
34#include <mutex>
35
36#include <sys/mman.h>  // mmap()
37#include <sys/types.h> // getpid()
38#include <time.h>      // clock_gettime(), time(), localtime_r() */
39#include <unistd.h>    // for getpid(), read(), close()
40
41using namespace llvm;
42using namespace llvm::object;
43typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
44
45namespace {
46
47// language identifier (XXX: should we generate something better from debug
48// info?)
49#define JIT_LANG "llvm-IR"
50#define LLVM_PERF_JIT_MAGIC                                                    \
51  ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
52   (uint32_t)'D')
53#define LLVM_PERF_JIT_VERSION 1
54
55// bit 0: set if the jitdump file is using an architecture-specific timestamp
56// clock source
57#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
58
59struct LLVMPerfJitHeader;
60
61class PerfJITEventListener : public JITEventListener {
62public:
63  PerfJITEventListener();
64  ~PerfJITEventListener() {
65    if (MarkerAddr)
66      CloseMarker();
67  }
68
69  void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
70                          const RuntimeDyld::LoadedObjectInfo &L) override;
71  void notifyFreeingObject(ObjectKey K) override;
72
73private:
74  bool InitDebuggingDir();
75  bool OpenMarker();
76  void CloseMarker();
77  static bool FillMachine(LLVMPerfJitHeader &hdr);
78
79  void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
80                  uint64_t CodeSize);
81  void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
82
83  // cache lookups
84  pid_t Pid;
85
86  // base directory for output data
87  std::string JitPath;
88
89  // output data stream, closed via Dumpstream
90  int DumpFd = -1;
91
92  // output data stream
93  std::unique_ptr<raw_fd_ostream> Dumpstream;
94
95  // prevent concurrent dumps from messing up the output file
96  sys::Mutex Mutex;
97
98  // perf mmap marker
99  void *MarkerAddr = NULL;
100
101  // perf support ready
102  bool SuccessfullyInitialized = false;
103
104  // identifier for functions, primarily to identify when moving them around
105  uint64_t CodeGeneration = 1;
106};
107
108// The following are POD struct definitions from the perf jit specification
109
110enum LLVMPerfJitRecordType {
111  JIT_CODE_LOAD = 0,
112  JIT_CODE_MOVE = 1, // not emitted, code isn't moved
113  JIT_CODE_DEBUG_INFO = 2,
114  JIT_CODE_CLOSE = 3,          // not emitted, unnecessary
115  JIT_CODE_UNWINDING_INFO = 4, // not emitted
116
117  JIT_CODE_MAX
118};
119
120struct LLVMPerfJitHeader {
121  uint32_t Magic;     // characters "JiTD"
122  uint32_t Version;   // header version
123  uint32_t TotalSize; // total size of header
124  uint32_t ElfMach;   // elf mach target
125  uint32_t Pad1;      // reserved
126  uint32_t Pid;
127  uint64_t Timestamp; // timestamp
128  uint64_t Flags;     // flags
129};
130
131// record prefix (mandatory in each record)
132struct LLVMPerfJitRecordPrefix {
133  uint32_t Id; // record type identifier
134  uint32_t TotalSize;
135  uint64_t Timestamp;
136};
137
138struct LLVMPerfJitRecordCodeLoad {
139  LLVMPerfJitRecordPrefix Prefix;
140
141  uint32_t Pid;
142  uint32_t Tid;
143  uint64_t Vma;
144  uint64_t CodeAddr;
145  uint64_t CodeSize;
146  uint64_t CodeIndex;
147};
148
149struct LLVMPerfJitDebugEntry {
150  uint64_t Addr;
151  int Lineno;  // source line number starting at 1
152  int Discrim; // column discriminator, 0 is default
153  // followed by null terminated filename, \xff\0 if same as previous entry
154};
155
156struct LLVMPerfJitRecordDebugInfo {
157  LLVMPerfJitRecordPrefix Prefix;
158
159  uint64_t CodeAddr;
160  uint64_t NrEntry;
161  // followed by NrEntry LLVMPerfJitDebugEntry records
162};
163
164static inline uint64_t timespec_to_ns(const struct timespec *ts) {
165  const uint64_t NanoSecPerSec = 1000000000;
166  return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
167}
168
169static inline uint64_t perf_get_timestamp(void) {
170  struct timespec ts;
171  int ret;
172
173  ret = clock_gettime(CLOCK_MONOTONIC, &ts);
174  if (ret)
175    return 0;
176
177  return timespec_to_ns(&ts);
178}
179
180PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
181  // check if clock-source is supported
182  if (!perf_get_timestamp()) {
183    errs() << "kernel does not support CLOCK_MONOTONIC\n";
184    return;
185  }
186
187  if (!InitDebuggingDir()) {
188    errs() << "could not initialize debugging directory\n";
189    return;
190  }
191
192  std::string Filename;
193  raw_string_ostream FilenameBuf(Filename);
194  FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
195
196  // Need to open ourselves, because we need to hand the FD to OpenMarker() and
197  // raw_fd_ostream doesn't expose the FD.
198  using sys::fs::openFileForWrite;
199  if (auto EC =
200          openFileForReadWrite(FilenameBuf.str(), DumpFd,
201			       sys::fs::CD_CreateNew, sys::fs::OF_None)) {
202    errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
203           << EC.message() << "\n";
204    return;
205  }
206
207  Dumpstream = std::make_unique<raw_fd_ostream>(DumpFd, true);
208
209  LLVMPerfJitHeader Header = {0};
210  if (!FillMachine(Header))
211    return;
212
213  // signal this process emits JIT information
214  if (!OpenMarker())
215    return;
216
217  // emit dumpstream header
218  Header.Magic = LLVM_PERF_JIT_MAGIC;
219  Header.Version = LLVM_PERF_JIT_VERSION;
220  Header.TotalSize = sizeof(Header);
221  Header.Pid = Pid;
222  Header.Timestamp = perf_get_timestamp();
223  Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
224
225  // Everything initialized, can do profiling now.
226  if (!Dumpstream->has_error())
227    SuccessfullyInitialized = true;
228}
229
230void PerfJITEventListener::notifyObjectLoaded(
231    ObjectKey K, const ObjectFile &Obj,
232    const RuntimeDyld::LoadedObjectInfo &L) {
233
234  if (!SuccessfullyInitialized)
235    return;
236
237  OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
238  const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
239
240  // Get the address of the object image for use as a unique identifier
241  std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
242
243  // Use symbol info to iterate over functions in the object.
244  for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
245    SymbolRef Sym = P.first;
246    std::string SourceFileName;
247
248    Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
249    if (!SymTypeOrErr) {
250      // There's not much we can with errors here
251      consumeError(SymTypeOrErr.takeError());
252      continue;
253    }
254    SymbolRef::Type SymType = *SymTypeOrErr;
255    if (SymType != SymbolRef::ST_Function)
256      continue;
257
258    Expected<StringRef> Name = Sym.getName();
259    if (!Name) {
260      consumeError(Name.takeError());
261      continue;
262    }
263
264    Expected<uint64_t> AddrOrErr = Sym.getAddress();
265    if (!AddrOrErr) {
266      consumeError(AddrOrErr.takeError());
267      continue;
268    }
269    uint64_t Size = P.second;
270    object::SectionedAddress Address;
271    Address.Address = *AddrOrErr;
272
273    uint64_t SectionIndex = object::SectionedAddress::UndefSection;
274    if (auto SectOrErr = Sym.getSection())
275        if (*SectOrErr != Obj.section_end())
276            SectionIndex = SectOrErr.get()->getIndex();
277
278    // According to spec debugging info has to come before loading the
279    // corresonding code load.
280    DILineInfoTable Lines = Context->getLineInfoForAddressRange(
281        {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
282
283    NotifyDebug(*AddrOrErr, Lines);
284    NotifyCode(Name, *AddrOrErr, Size);
285  }
286
287  Dumpstream->flush();
288}
289
290void PerfJITEventListener::notifyFreeingObject(ObjectKey K) {
291  // perf currently doesn't have an interface for unloading. But munmap()ing the
292  // code section does, so that's ok.
293}
294
295bool PerfJITEventListener::InitDebuggingDir() {
296  time_t Time;
297  struct tm LocalTime;
298  char TimeBuffer[sizeof("YYYYMMDD")];
299  SmallString<64> Path;
300
301  // search for location to dump data to
302  if (const char *BaseDir = getenv("JITDUMPDIR"))
303    Path.append(BaseDir);
304  else if (!sys::path::home_directory(Path))
305    Path = ".";
306
307  // create debug directory
308  Path += "/.debug/jit/";
309  if (auto EC = sys::fs::create_directories(Path)) {
310    errs() << "could not create jit cache directory " << Path << ": "
311           << EC.message() << "\n";
312    return false;
313  }
314
315  // create unique directory for dump data related to this process
316  time(&Time);
317  localtime_r(&Time, &LocalTime);
318  strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
319  Path += JIT_LANG "-jit-";
320  Path += TimeBuffer;
321
322  SmallString<128> UniqueDebugDir;
323
324  using sys::fs::createUniqueDirectory;
325  if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
326    errs() << "could not create unique jit cache directory " << UniqueDebugDir
327           << ": " << EC.message() << "\n";
328    return false;
329  }
330
331  JitPath = UniqueDebugDir.str();
332
333  return true;
334}
335
336bool PerfJITEventListener::OpenMarker() {
337  // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
338  // is captured either live (perf record running when we mmap) or in deferred
339  // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
340  // file for more meta data info about the jitted code. Perf report/annotate
341  // detect this special filename and process the jitdump file.
342  //
343  // Mapping must be PROT_EXEC to ensure it is captured by perf record
344  // even when not using -d option.
345  MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(),
346                      PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0);
347
348  if (MarkerAddr == MAP_FAILED) {
349    errs() << "could not mmap JIT marker\n";
350    return false;
351  }
352  return true;
353}
354
355void PerfJITEventListener::CloseMarker() {
356  if (!MarkerAddr)
357    return;
358
359  munmap(MarkerAddr, sys::Process::getPageSizeEstimate());
360  MarkerAddr = nullptr;
361}
362
363bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
364  char id[16];
365  struct {
366    uint16_t e_type;
367    uint16_t e_machine;
368  } info;
369
370  size_t RequiredMemory = sizeof(id) + sizeof(info);
371
372  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
373    MemoryBuffer::getFileSlice("/proc/self/exe",
374			       RequiredMemory,
375			       0);
376
377  // This'll not guarantee that enough data was actually read from the
378  // underlying file. Instead the trailing part of the buffer would be
379  // zeroed. Given the ELF signature check below that seems ok though,
380  // it's unlikely that the file ends just after that, and the
381  // consequence would just be that perf wouldn't recognize the
382  // signature.
383  if (auto EC = MB.getError()) {
384    errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
385    return false;
386  }
387
388  memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
389  memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
390
391  // check ELF signature
392  if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
393    errs() << "invalid elf signature\n";
394    return false;
395  }
396
397  hdr.ElfMach = info.e_machine;
398
399  return true;
400}
401
402void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
403                                      uint64_t CodeAddr, uint64_t CodeSize) {
404  assert(SuccessfullyInitialized);
405
406  // 0 length functions can't have samples.
407  if (CodeSize == 0)
408    return;
409
410  LLVMPerfJitRecordCodeLoad rec;
411  rec.Prefix.Id = JIT_CODE_LOAD;
412  rec.Prefix.TotalSize = sizeof(rec) +        // debug record itself
413                         Symbol->size() + 1 + // symbol name
414                         CodeSize;            // and code
415  rec.Prefix.Timestamp = perf_get_timestamp();
416
417  rec.CodeSize = CodeSize;
418  rec.Vma = 0;
419  rec.CodeAddr = CodeAddr;
420  rec.Pid = Pid;
421  rec.Tid = get_threadid();
422
423  // avoid interspersing output
424  std::lock_guard<sys::Mutex> Guard(Mutex);
425
426  rec.CodeIndex = CodeGeneration++; // under lock!
427
428  Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
429  Dumpstream->write(Symbol->data(), Symbol->size() + 1);
430  Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
431}
432
433void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
434                                       DILineInfoTable Lines) {
435  assert(SuccessfullyInitialized);
436
437  // Didn't get useful debug info.
438  if (Lines.empty())
439    return;
440
441  LLVMPerfJitRecordDebugInfo rec;
442  rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
443  rec.Prefix.TotalSize = sizeof(rec); // will be increased further
444  rec.Prefix.Timestamp = perf_get_timestamp();
445  rec.CodeAddr = CodeAddr;
446  rec.NrEntry = Lines.size();
447
448  // compute total size size of record (variable due to filenames)
449  DILineInfoTable::iterator Begin = Lines.begin();
450  DILineInfoTable::iterator End = Lines.end();
451  for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
452    DILineInfo &line = It->second;
453    rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
454    rec.Prefix.TotalSize += line.FileName.size() + 1;
455  }
456
457  // The debug_entry describes the source line information. It is defined as
458  // follows in order:
459  // * uint64_t code_addr: address of function for which the debug information
460  // is generated
461  // * uint32_t line     : source file line number (starting at 1)
462  // * uint32_t discrim  : column discriminator, 0 is default
463  // * char name[n]      : source file name in ASCII, including null termination
464
465  // avoid interspersing output
466  std::lock_guard<sys::Mutex> Guard(Mutex);
467
468  Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
469
470  for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
471    LLVMPerfJitDebugEntry LineInfo;
472    DILineInfo &Line = It->second;
473
474    LineInfo.Addr = It->first;
475    // The function re-created by perf is preceded by a elf
476    // header. Need to adjust for that, otherwise the results are
477    // wrong.
478    LineInfo.Addr += 0x40;
479    LineInfo.Lineno = Line.Line;
480    LineInfo.Discrim = Line.Discriminator;
481
482    Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
483                      sizeof(LineInfo));
484    Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
485  }
486}
487
488// There should be only a single event listener per process, otherwise perf gets
489// confused.
490llvm::ManagedStatic<PerfJITEventListener> PerfListener;
491
492} // end anonymous namespace
493
494namespace llvm {
495JITEventListener *JITEventListener::createPerfJITEventListener() {
496  return &*PerfListener;
497}
498
499} // namespace llvm
500
501LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void)
502{
503  return wrap(JITEventListener::createPerfJITEventListener());
504}
505