SampleProfReader.h revision 360784
1//===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains definitions needed for reading sample profiles.
10//
11// NOTE: If you are making changes to this file format, please remember
12//       to document them in the Clang documentation at
13//       tools/clang/docs/UsersManual.rst.
14//
15// Text format
16// -----------
17//
18// Sample profiles are written as ASCII text. The file is divided into
19// sections, which correspond to each of the functions executed at runtime.
20// Each section has the following format
21//
22//     function1:total_samples:total_head_samples
23//      offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
24//      offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
25//      ...
26//      offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
27//      offsetA[.discriminator]: fnA:num_of_total_samples
28//       offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
29//       ...
30//
31// This is a nested tree in which the identation represents the nesting level
32// of the inline stack. There are no blank lines in the file. And the spacing
33// within a single line is fixed. Additional spaces will result in an error
34// while reading the file.
35//
36// Any line starting with the '#' character is completely ignored.
37//
38// Inlined calls are represented with indentation. The Inline stack is a
39// stack of source locations in which the top of the stack represents the
40// leaf function, and the bottom of the stack represents the actual
41// symbol to which the instruction belongs.
42//
43// Function names must be mangled in order for the profile loader to
44// match them in the current translation unit. The two numbers in the
45// function header specify how many total samples were accumulated in the
46// function (first number), and the total number of samples accumulated
47// in the prologue of the function (second number). This head sample
48// count provides an indicator of how frequently the function is invoked.
49//
50// There are two types of lines in the function body.
51//
52// * Sampled line represents the profile information of a source location.
53// * Callsite line represents the profile information of a callsite.
54//
55// Each sampled line may contain several items. Some are optional (marked
56// below):
57//
58// a. Source line offset. This number represents the line number
59//    in the function where the sample was collected. The line number is
60//    always relative to the line where symbol of the function is
61//    defined. So, if the function has its header at line 280, the offset
62//    13 is at line 293 in the file.
63//
64//    Note that this offset should never be a negative number. This could
65//    happen in cases like macros. The debug machinery will register the
66//    line number at the point of macro expansion. So, if the macro was
67//    expanded in a line before the start of the function, the profile
68//    converter should emit a 0 as the offset (this means that the optimizers
69//    will not be able to associate a meaningful weight to the instructions
70//    in the macro).
71//
72// b. [OPTIONAL] Discriminator. This is used if the sampled program
73//    was compiled with DWARF discriminator support
74//    (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
75//    DWARF discriminators are unsigned integer values that allow the
76//    compiler to distinguish between multiple execution paths on the
77//    same source line location.
78//
79//    For example, consider the line of code ``if (cond) foo(); else bar();``.
80//    If the predicate ``cond`` is true 80% of the time, then the edge
81//    into function ``foo`` should be considered to be taken most of the
82//    time. But both calls to ``foo`` and ``bar`` are at the same source
83//    line, so a sample count at that line is not sufficient. The
84//    compiler needs to know which part of that line is taken more
85//    frequently.
86//
87//    This is what discriminators provide. In this case, the calls to
88//    ``foo`` and ``bar`` will be at the same line, but will have
89//    different discriminator values. This allows the compiler to correctly
90//    set edge weights into ``foo`` and ``bar``.
91//
92// c. Number of samples. This is an integer quantity representing the
93//    number of samples collected by the profiler at this source
94//    location.
95//
96// d. [OPTIONAL] Potential call targets and samples. If present, this
97//    line contains a call instruction. This models both direct and
98//    number of samples. For example,
99//
100//      130: 7  foo:3  bar:2  baz:7
101//
102//    The above means that at relative line offset 130 there is a call
103//    instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
104//    with ``baz()`` being the relatively more frequently called target.
105//
106// Each callsite line may contain several items. Some are optional.
107//
108// a. Source line offset. This number represents the line number of the
109//    callsite that is inlined in the profiled binary.
110//
111// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
112//
113// c. Number of samples. This is an integer quantity representing the
114//    total number of samples collected for the inlined instance at this
115//    callsite
116//
117//
118// Binary format
119// -------------
120//
121// This is a more compact encoding. Numbers are encoded as ULEB128 values
122// and all strings are encoded in a name table. The file is organized in
123// the following sections:
124//
125// MAGIC (uint64_t)
126//    File identifier computed by function SPMagic() (0x5350524f463432ff)
127//
128// VERSION (uint32_t)
129//    File format version number computed by SPVersion()
130//
131// SUMMARY
132//    TOTAL_COUNT (uint64_t)
133//        Total number of samples in the profile.
134//    MAX_COUNT (uint64_t)
135//        Maximum value of samples on a line.
136//    MAX_FUNCTION_COUNT (uint64_t)
137//        Maximum number of samples at function entry (head samples).
138//    NUM_COUNTS (uint64_t)
139//        Number of lines with samples.
140//    NUM_FUNCTIONS (uint64_t)
141//        Number of functions with samples.
142//    NUM_DETAILED_SUMMARY_ENTRIES (size_t)
143//        Number of entries in detailed summary
144//    DETAILED_SUMMARY
145//        A list of detailed summary entry. Each entry consists of
146//        CUTOFF (uint32_t)
147//            Required percentile of total sample count expressed as a fraction
148//            multiplied by 1000000.
149//        MIN_COUNT (uint64_t)
150//            The minimum number of samples required to reach the target
151//            CUTOFF.
152//        NUM_COUNTS (uint64_t)
153//            Number of samples to get to the desrired percentile.
154//
155// NAME TABLE
156//    SIZE (uint32_t)
157//        Number of entries in the name table.
158//    NAMES
159//        A NUL-separated list of SIZE strings.
160//
161// FUNCTION BODY (one for each uninlined function body present in the profile)
162//    HEAD_SAMPLES (uint64_t) [only for top-level functions]
163//        Total number of samples collected at the head (prologue) of the
164//        function.
165//        NOTE: This field should only be present for top-level functions
166//              (i.e., not inlined into any caller). Inlined function calls
167//              have no prologue, so they don't need this.
168//    NAME_IDX (uint32_t)
169//        Index into the name table indicating the function name.
170//    SAMPLES (uint64_t)
171//        Total number of samples collected in this function.
172//    NRECS (uint32_t)
173//        Total number of sampling records this function's profile.
174//    BODY RECORDS
175//        A list of NRECS entries. Each entry contains:
176//          OFFSET (uint32_t)
177//            Line offset from the start of the function.
178//          DISCRIMINATOR (uint32_t)
179//            Discriminator value (see description of discriminators
180//            in the text format documentation above).
181//          SAMPLES (uint64_t)
182//            Number of samples collected at this location.
183//          NUM_CALLS (uint32_t)
184//            Number of non-inlined function calls made at this location. In the
185//            case of direct calls, this number will always be 1. For indirect
186//            calls (virtual functions and function pointers) this will
187//            represent all the actual functions called at runtime.
188//          CALL_TARGETS
189//            A list of NUM_CALLS entries for each called function:
190//               NAME_IDX (uint32_t)
191//                  Index into the name table with the callee name.
192//               SAMPLES (uint64_t)
193//                  Number of samples collected at the call site.
194//    NUM_INLINED_FUNCTIONS (uint32_t)
195//      Number of callees inlined into this function.
196//    INLINED FUNCTION RECORDS
197//      A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
198//      callees.
199//        OFFSET (uint32_t)
200//          Line offset from the start of the function.
201//        DISCRIMINATOR (uint32_t)
202//          Discriminator value (see description of discriminators
203//          in the text format documentation above).
204//        FUNCTION BODY
205//          A FUNCTION BODY entry describing the inlined function.
206//===----------------------------------------------------------------------===//
207
208#ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
209#define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
210
211#include "llvm/ADT/SmallVector.h"
212#include "llvm/ADT/StringMap.h"
213#include "llvm/ADT/StringRef.h"
214#include "llvm/ADT/Twine.h"
215#include "llvm/IR/DiagnosticInfo.h"
216#include "llvm/IR/Function.h"
217#include "llvm/IR/LLVMContext.h"
218#include "llvm/IR/ProfileSummary.h"
219#include "llvm/ProfileData/GCOV.h"
220#include "llvm/ProfileData/SampleProf.h"
221#include "llvm/Support/Debug.h"
222#include "llvm/Support/ErrorOr.h"
223#include "llvm/Support/MemoryBuffer.h"
224#include "llvm/Support/SymbolRemappingReader.h"
225#include <algorithm>
226#include <cstdint>
227#include <memory>
228#include <string>
229#include <system_error>
230#include <vector>
231
232namespace llvm {
233
234class raw_ostream;
235
236namespace sampleprof {
237
238class SampleProfileReader;
239
240/// SampleProfileReaderItaniumRemapper remaps the profile data from a
241/// sample profile data reader, by applying a provided set of equivalences
242/// between components of the symbol names in the profile.
243class SampleProfileReaderItaniumRemapper {
244public:
245  SampleProfileReaderItaniumRemapper(std::unique_ptr<MemoryBuffer> B,
246                                     std::unique_ptr<SymbolRemappingReader> SRR,
247                                     SampleProfileReader &R)
248      : Buffer(std::move(B)), Remappings(std::move(SRR)), Reader(R) {
249    assert(Remappings && "Remappings cannot be nullptr");
250  }
251
252  /// Create a remapper from the given remapping file. The remapper will
253  /// be used for profile read in by Reader.
254  static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
255  create(const std::string Filename, SampleProfileReader &Reader,
256         LLVMContext &C);
257
258  /// Create a remapper from the given Buffer. The remapper will
259  /// be used for profile read in by Reader.
260  static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
261  create(std::unique_ptr<MemoryBuffer> &B, SampleProfileReader &Reader,
262         LLVMContext &C);
263
264  /// Apply remappings to the profile read by Reader.
265  void applyRemapping(LLVMContext &Ctx);
266
267  bool hasApplied() { return RemappingApplied; }
268
269  /// Insert function name into remapper.
270  void insert(StringRef FunctionName) { Remappings->insert(FunctionName); }
271
272  /// Query whether there is equivalent in the remapper which has been
273  /// inserted.
274  bool exist(StringRef FunctionName) {
275    return Remappings->lookup(FunctionName);
276  }
277
278  /// Return the samples collected for function \p F if remapper knows
279  /// it is present in SampleMap.
280  FunctionSamples *getSamplesFor(StringRef FunctionName);
281
282private:
283  // The buffer holding the content read from remapping file.
284  std::unique_ptr<MemoryBuffer> Buffer;
285  std::unique_ptr<SymbolRemappingReader> Remappings;
286  DenseMap<SymbolRemappingReader::Key, FunctionSamples *> SampleMap;
287  // The Reader the remapper is servicing.
288  SampleProfileReader &Reader;
289  // Indicate whether remapping has been applied to the profile read
290  // by Reader -- by calling applyRemapping.
291  bool RemappingApplied = false;
292};
293
294/// Sample-based profile reader.
295///
296/// Each profile contains sample counts for all the functions
297/// executed. Inside each function, statements are annotated with the
298/// collected samples on all the instructions associated with that
299/// statement.
300///
301/// For this to produce meaningful data, the program needs to be
302/// compiled with some debug information (at minimum, line numbers:
303/// -gline-tables-only). Otherwise, it will be impossible to match IR
304/// instructions to the line numbers collected by the profiler.
305///
306/// From the profile file, we are interested in collecting the
307/// following information:
308///
309/// * A list of functions included in the profile (mangled names).
310///
311/// * For each function F:
312///   1. The total number of samples collected in F.
313///
314///   2. The samples collected at each line in F. To provide some
315///      protection against source code shuffling, line numbers should
316///      be relative to the start of the function.
317///
318/// The reader supports two file formats: text and binary. The text format
319/// is useful for debugging and testing, while the binary format is more
320/// compact and I/O efficient. They can both be used interchangeably.
321class SampleProfileReader {
322public:
323  SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
324                      SampleProfileFormat Format = SPF_None)
325      : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}
326
327  virtual ~SampleProfileReader() = default;
328
329  /// Read and validate the file header.
330  virtual std::error_code readHeader() = 0;
331
332  /// The interface to read sample profiles from the associated file.
333  std::error_code read() {
334    if (std::error_code EC = readImpl())
335      return EC;
336    if (Remapper)
337      Remapper->applyRemapping(Ctx);
338    return sampleprof_error::success;
339  }
340
341  /// The implementaion to read sample profiles from the associated file.
342  virtual std::error_code readImpl() = 0;
343
344  /// Print the profile for \p FName on stream \p OS.
345  void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
346
347  virtual void collectFuncsFrom(const Module &M) {}
348
349  /// Print all the profiles on stream \p OS.
350  void dump(raw_ostream &OS = dbgs());
351
352  /// Return the samples collected for function \p F.
353  FunctionSamples *getSamplesFor(const Function &F) {
354    // The function name may have been updated by adding suffix. Call
355    // a helper to (optionally) strip off suffixes so that we can
356    // match against the original function name in the profile.
357    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
358    return getSamplesFor(CanonName);
359  }
360
361  /// Return the samples collected for function \p F, create empty
362  /// FunctionSamples if it doesn't exist.
363  FunctionSamples *getOrCreateSamplesFor(const Function &F) {
364    std::string FGUID;
365    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
366    CanonName = getRepInFormat(CanonName, getFormat(), FGUID);
367    return &Profiles[CanonName];
368  }
369
370  /// Return the samples collected for function \p F.
371  virtual FunctionSamples *getSamplesFor(StringRef Fname) {
372    if (Remapper) {
373      if (auto FS = Remapper->getSamplesFor(Fname))
374        return FS;
375    }
376    std::string FGUID;
377    Fname = getRepInFormat(Fname, getFormat(), FGUID);
378    auto It = Profiles.find(Fname);
379    if (It != Profiles.end())
380      return &It->second;
381    return nullptr;
382  }
383
384  /// Return all the profiles.
385  StringMap<FunctionSamples> &getProfiles() { return Profiles; }
386
387  /// Report a parse error message.
388  void reportError(int64_t LineNumber, Twine Msg) const {
389    Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
390                                             LineNumber, Msg));
391  }
392
393  /// Create a sample profile reader appropriate to the file format.
394  /// Create a remapper underlying if RemapFilename is not empty.
395  static ErrorOr<std::unique_ptr<SampleProfileReader>>
396  create(const std::string Filename, LLVMContext &C,
397         const std::string RemapFilename = "");
398
399  /// Create a sample profile reader from the supplied memory buffer.
400  /// Create a remapper underlying if RemapFilename is not empty.
401  static ErrorOr<std::unique_ptr<SampleProfileReader>>
402  create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
403         const std::string RemapFilename = "");
404
405  /// Return the profile summary.
406  ProfileSummary &getSummary() const { return *(Summary.get()); }
407
408  MemoryBuffer *getBuffer() const { return Buffer.get(); }
409
410  /// \brief Return the profile format.
411  SampleProfileFormat getFormat() const { return Format; }
412
413  virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
414    return nullptr;
415  };
416
417  /// It includes all the names that have samples either in outline instance
418  /// or inline instance.
419  virtual std::vector<StringRef> *getNameTable() { return nullptr; }
420  virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; };
421
422protected:
423  /// Map every function to its associated profile.
424  ///
425  /// The profile of every function executed at runtime is collected
426  /// in the structure FunctionSamples. This maps function objects
427  /// to their corresponding profiles.
428  StringMap<FunctionSamples> Profiles;
429
430  /// LLVM context used to emit diagnostics.
431  LLVMContext &Ctx;
432
433  /// Memory buffer holding the profile file.
434  std::unique_ptr<MemoryBuffer> Buffer;
435
436  /// Profile summary information.
437  std::unique_ptr<ProfileSummary> Summary;
438
439  /// Take ownership of the summary of this reader.
440  static std::unique_ptr<ProfileSummary>
441  takeSummary(SampleProfileReader &Reader) {
442    return std::move(Reader.Summary);
443  }
444
445  /// Compute summary for this profile.
446  void computeSummary();
447
448  std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
449
450  /// \brief The format of sample.
451  SampleProfileFormat Format = SPF_None;
452};
453
454class SampleProfileReaderText : public SampleProfileReader {
455public:
456  SampleProfileReaderText(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
457      : SampleProfileReader(std::move(B), C, SPF_Text) {}
458
459  /// Read and validate the file header.
460  std::error_code readHeader() override { return sampleprof_error::success; }
461
462  /// Read sample profiles from the associated file.
463  std::error_code readImpl() override;
464
465  /// Return true if \p Buffer is in the format supported by this class.
466  static bool hasFormat(const MemoryBuffer &Buffer);
467};
468
469class SampleProfileReaderBinary : public SampleProfileReader {
470public:
471  SampleProfileReaderBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
472                            SampleProfileFormat Format = SPF_None)
473      : SampleProfileReader(std::move(B), C, Format) {}
474
475  /// Read and validate the file header.
476  virtual std::error_code readHeader() override;
477
478  /// Read sample profiles from the associated file.
479  std::error_code readImpl() override;
480
481  /// It includes all the names that have samples either in outline instance
482  /// or inline instance.
483  virtual std::vector<StringRef> *getNameTable() override { return &NameTable; }
484
485protected:
486  /// Read a numeric value of type T from the profile.
487  ///
488  /// If an error occurs during decoding, a diagnostic message is emitted and
489  /// EC is set.
490  ///
491  /// \returns the read value.
492  template <typename T> ErrorOr<T> readNumber();
493
494  /// Read a numeric value of type T from the profile. The value is saved
495  /// without encoded.
496  template <typename T> ErrorOr<T> readUnencodedNumber();
497
498  /// Read a string from the profile.
499  ///
500  /// If an error occurs during decoding, a diagnostic message is emitted and
501  /// EC is set.
502  ///
503  /// \returns the read value.
504  ErrorOr<StringRef> readString();
505
506  /// Read the string index and check whether it overflows the table.
507  template <typename T> inline ErrorOr<uint32_t> readStringIndex(T &Table);
508
509  /// Return true if we've reached the end of file.
510  bool at_eof() const { return Data >= End; }
511
512  /// Read the next function profile instance.
513  std::error_code readFuncProfile(const uint8_t *Start);
514
515  /// Read the contents of the given profile instance.
516  std::error_code readProfile(FunctionSamples &FProfile);
517
518  /// Read the contents of Magic number and Version number.
519  std::error_code readMagicIdent();
520
521  /// Read profile summary.
522  std::error_code readSummary();
523
524  /// Read the whole name table.
525  virtual std::error_code readNameTable();
526
527  /// Points to the current location in the buffer.
528  const uint8_t *Data = nullptr;
529
530  /// Points to the end of the buffer.
531  const uint8_t *End = nullptr;
532
533  /// Function name table.
534  std::vector<StringRef> NameTable;
535
536  /// Read a string indirectly via the name table.
537  virtual ErrorOr<StringRef> readStringFromTable();
538
539private:
540  std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
541  virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
542};
543
544class SampleProfileReaderRawBinary : public SampleProfileReaderBinary {
545private:
546  virtual std::error_code verifySPMagic(uint64_t Magic) override;
547
548public:
549  SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
550                               SampleProfileFormat Format = SPF_Binary)
551      : SampleProfileReaderBinary(std::move(B), C, Format) {}
552
553  /// \brief Return true if \p Buffer is in the format supported by this class.
554  static bool hasFormat(const MemoryBuffer &Buffer);
555};
556
557/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase defines
558/// the basic structure of the extensible binary format.
559/// The format is organized in sections except the magic and version number
560/// at the beginning. There is a section table before all the sections, and
561/// each entry in the table describes the entry type, start, size and
562/// attributes. The format in each section is defined by the section itself.
563///
564/// It is easy to add a new section while maintaining the backward
565/// compatibility of the profile. Nothing extra needs to be done. If we want
566/// to extend an existing section, like add cache misses information in
567/// addition to the sample count in the profile body, we can add a new section
568/// with the extension and retire the existing section, and we could choose
569/// to keep the parser of the old section if we want the reader to be able
570/// to read both new and old format profile.
571///
572/// SampleProfileReaderExtBinary/SampleProfileWriterExtBinary define the
573/// commonly used sections of a profile in extensible binary format. It is
574/// possible to define other types of profile inherited from
575/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase.
576class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
577private:
578  std::error_code decompressSection(const uint8_t *SecStart,
579                                    const uint64_t SecSize,
580                                    const uint8_t *&DecompressBuf,
581                                    uint64_t &DecompressBufSize);
582
583  BumpPtrAllocator Allocator;
584
585protected:
586  std::vector<SecHdrTableEntry> SecHdrTable;
587  std::unique_ptr<ProfileSymbolList> ProfSymList;
588  std::error_code readSecHdrTableEntry();
589  std::error_code readSecHdrTable();
590  virtual std::error_code readHeader() override;
591  virtual std::error_code verifySPMagic(uint64_t Magic) override = 0;
592  virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
593                                         SecType Type) = 0;
594
595public:
596  SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
597                                   LLVMContext &C, SampleProfileFormat Format)
598      : SampleProfileReaderBinary(std::move(B), C, Format) {}
599
600  /// Read sample profiles in extensible format from the associated file.
601  std::error_code readImpl() override;
602
603  /// Get the total size of all \p Type sections.
604  uint64_t getSectionSize(SecType Type);
605  /// Get the total size of header and all sections.
606  uint64_t getFileSize();
607  virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override;
608};
609
610class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
611private:
612  virtual std::error_code verifySPMagic(uint64_t Magic) override;
613  virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
614                                         SecType Type) override;
615  std::error_code readProfileSymbolList();
616  std::error_code readFuncOffsetTable();
617  std::error_code readFuncProfiles();
618
619  /// The table mapping from function name to the offset of its FunctionSample
620  /// towards file start.
621  DenseMap<StringRef, uint64_t> FuncOffsetTable;
622  /// The set containing the functions to use when compiling a module.
623  DenseSet<StringRef> FuncsToUse;
624  /// Use all functions from the input profile.
625  bool UseAllFuncs = true;
626
627public:
628  SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
629                               SampleProfileFormat Format = SPF_Ext_Binary)
630      : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {}
631
632  /// \brief Return true if \p Buffer is in the format supported by this class.
633  static bool hasFormat(const MemoryBuffer &Buffer);
634
635  virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
636    return std::move(ProfSymList);
637  };
638
639  /// Collect functions with definitions in Module \p M.
640  void collectFuncsFrom(const Module &M) override;
641};
642
643class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
644private:
645  /// Function name table.
646  std::vector<std::string> NameTable;
647  /// The table mapping from function name to the offset of its FunctionSample
648  /// towards file start.
649  DenseMap<StringRef, uint64_t> FuncOffsetTable;
650  /// The set containing the functions to use when compiling a module.
651  DenseSet<StringRef> FuncsToUse;
652  /// Use all functions from the input profile.
653  bool UseAllFuncs = true;
654  virtual std::error_code verifySPMagic(uint64_t Magic) override;
655  virtual std::error_code readNameTable() override;
656  /// Read a string indirectly via the name table.
657  virtual ErrorOr<StringRef> readStringFromTable() override;
658  virtual std::error_code readHeader() override;
659  std::error_code readFuncOffsetTable();
660
661public:
662  SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B,
663                                   LLVMContext &C)
664      : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) {}
665
666  /// \brief Return true if \p Buffer is in the format supported by this class.
667  static bool hasFormat(const MemoryBuffer &Buffer);
668
669  /// Read samples only for functions to use.
670  std::error_code readImpl() override;
671
672  /// Collect functions to be used when compiling Module \p M.
673  void collectFuncsFrom(const Module &M) override;
674};
675
676using InlineCallStack = SmallVector<FunctionSamples *, 10>;
677
678// Supported histogram types in GCC.  Currently, we only need support for
679// call target histograms.
680enum HistType {
681  HIST_TYPE_INTERVAL,
682  HIST_TYPE_POW2,
683  HIST_TYPE_SINGLE_VALUE,
684  HIST_TYPE_CONST_DELTA,
685  HIST_TYPE_INDIR_CALL,
686  HIST_TYPE_AVERAGE,
687  HIST_TYPE_IOR,
688  HIST_TYPE_INDIR_CALL_TOPN
689};
690
691class SampleProfileReaderGCC : public SampleProfileReader {
692public:
693  SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
694      : SampleProfileReader(std::move(B), C, SPF_GCC),
695        GcovBuffer(Buffer.get()) {}
696
697  /// Read and validate the file header.
698  std::error_code readHeader() override;
699
700  /// Read sample profiles from the associated file.
701  std::error_code readImpl() override;
702
703  /// Return true if \p Buffer is in the format supported by this class.
704  static bool hasFormat(const MemoryBuffer &Buffer);
705
706protected:
707  std::error_code readNameTable();
708  std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
709                                         bool Update, uint32_t Offset);
710  std::error_code readFunctionProfiles();
711  std::error_code skipNextWord();
712  template <typename T> ErrorOr<T> readNumber();
713  ErrorOr<StringRef> readString();
714
715  /// Read the section tag and check that it's the same as \p Expected.
716  std::error_code readSectionTag(uint32_t Expected);
717
718  /// GCOV buffer containing the profile.
719  GCOVBuffer GcovBuffer;
720
721  /// Function names in this profile.
722  std::vector<std::string> Names;
723
724  /// GCOV tags used to separate sections in the profile file.
725  static const uint32_t GCOVTagAFDOFileNames = 0xaa000000;
726  static const uint32_t GCOVTagAFDOFunction = 0xac000000;
727};
728
729} // end namespace sampleprof
730
731} // end namespace llvm
732
733#endif // LLVM_PROFILEDATA_SAMPLEPROFREADER_H
734