InstrProfReader.h revision 360784
1219019Sgabor//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2219019Sgabor// 3219019Sgabor// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4219019Sgabor// See https://llvm.org/LICENSE.txt for license information. 5219019Sgabor// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6219019Sgabor// 7219019Sgabor//===----------------------------------------------------------------------===// 8219019Sgabor// 9219019Sgabor// This file contains support for reading profiling data for instrumentation 10219019Sgabor// based PGO and coverage. 11219019Sgabor// 12219019Sgabor//===----------------------------------------------------------------------===// 13219019Sgabor 14219019Sgabor#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 15219019Sgabor#define LLVM_PROFILEDATA_INSTRPROFREADER_H 16219019Sgabor 17219019Sgabor#include "llvm/ADT/ArrayRef.h" 18219019Sgabor#include "llvm/ADT/StringRef.h" 19219019Sgabor#include "llvm/IR/ProfileSummary.h" 20219019Sgabor#include "llvm/ProfileData/InstrProf.h" 21219019Sgabor#include "llvm/Support/Endian.h" 22219019Sgabor#include "llvm/Support/Error.h" 23219019Sgabor#include "llvm/Support/LineIterator.h" 24219019Sgabor#include "llvm/Support/MemoryBuffer.h" 25219019Sgabor#include "llvm/Support/OnDiskHashTable.h" 26219019Sgabor#include "llvm/Support/SwapByteOrder.h" 27219019Sgabor#include <algorithm> 28219019Sgabor#include <cassert> 29219019Sgabor#include <cstddef> 30219019Sgabor#include <cstdint> 31219019Sgabor#include <iterator> 32219019Sgabor#include <memory> 33219019Sgabor#include <utility> 34219019Sgabor#include <vector> 35219019Sgabor 36219019Sgabornamespace llvm { 37219019Sgabor 38219019Sgaborclass InstrProfReader; 39219019Sgabor 40219019Sgabor/// A file format agnostic iterator over profiling data. 41219019Sgaborclass InstrProfIterator : public std::iterator<std::input_iterator_tag, 42219019Sgabor NamedInstrProfRecord> { 43219019Sgabor InstrProfReader *Reader = nullptr; 44219019Sgabor value_type Record; 45219019Sgabor 46219019Sgabor void Increment(); 47219019Sgabor 48219019Sgaborpublic: 49219019Sgabor InstrProfIterator() = default; 50219019Sgabor InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 51219019Sgabor 52219019Sgabor InstrProfIterator &operator++() { Increment(); return *this; } 53219019Sgabor bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; } 54219019Sgabor bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; } 55219019Sgabor value_type &operator*() { return Record; } 56219019Sgabor value_type *operator->() { return &Record; } 57219019Sgabor}; 58219019Sgabor 59219019Sgabor/// Base class and interface for reading profiling data of any known instrprof 60219019Sgabor/// format. Provides an iterator over NamedInstrProfRecords. 61219019Sgaborclass InstrProfReader { 62219019Sgabor instrprof_error LastError = instrprof_error::success; 63219019Sgabor 64219019Sgaborpublic: 65219019Sgabor InstrProfReader() = default; 66219019Sgabor virtual ~InstrProfReader() = default; 67219019Sgabor 68219019Sgabor /// Read the header. Required before reading first record. 69219019Sgabor virtual Error readHeader() = 0; 70219019Sgabor 71219019Sgabor /// Read a single record. 72219019Sgabor virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; 73219019Sgabor 74219019Sgabor /// Iterator over profile data. 75219019Sgabor InstrProfIterator begin() { return InstrProfIterator(this); } 76219019Sgabor InstrProfIterator end() { return InstrProfIterator(); } 77219019Sgabor 78219019Sgabor virtual bool isIRLevelProfile() const = 0; 79219019Sgabor 80219019Sgabor virtual bool hasCSIRLevelProfile() const = 0; 81219019Sgabor 82219019Sgabor /// Return the PGO symtab. There are three different readers: 83219019Sgabor /// Raw, Text, and Indexed profile readers. The first two types 84219019Sgabor /// of readers are used only by llvm-profdata tool, while the indexed 85219019Sgabor /// profile reader is also used by llvm-cov tool and the compiler ( 86219019Sgabor /// backend or frontend). Since creating PGO symtab can create 87219019Sgabor /// significant runtime and memory overhead (as it touches data 88219019Sgabor /// for the whole program), InstrProfSymtab for the indexed profile 89219019Sgabor /// reader should be created on demand and it is recommended to be 90219019Sgabor /// only used for dumping purpose with llvm-proftool, not with the 91219019Sgabor /// compiler. 92219019Sgabor virtual InstrProfSymtab &getSymtab() = 0; 93219019Sgabor 94219019Sgabor /// Compute the sum of counts and return in Sum. 95219019Sgabor void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); 96219019Sgabor 97219019Sgaborprotected: 98219019Sgabor std::unique_ptr<InstrProfSymtab> Symtab; 99219019Sgabor 100219019Sgabor /// Set the current error and return same. 101219019Sgabor Error error(instrprof_error Err) { 102219019Sgabor LastError = Err; 103219019Sgabor if (Err == instrprof_error::success) 104219019Sgabor return Error::success(); 105219019Sgabor return make_error<InstrProfError>(Err); 106219019Sgabor } 107219019Sgabor 108219019Sgabor Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } 109219019Sgabor 110219019Sgabor /// Clear the current error and return a successful one. 111219019Sgabor Error success() { return error(instrprof_error::success); } 112219019Sgabor 113219019Sgaborpublic: 114219019Sgabor /// Return true if the reader has finished reading the profile data. 115219019Sgabor bool isEOF() { return LastError == instrprof_error::eof; } 116219019Sgabor 117219019Sgabor /// Return true if the reader encountered an error reading profiling data. 118219019Sgabor bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 119219019Sgabor 120219019Sgabor /// Get the current error. 121219019Sgabor Error getError() { 122219019Sgabor if (hasError()) 123219019Sgabor return make_error<InstrProfError>(LastError); 124219019Sgabor return Error::success(); 125219019Sgabor } 126219019Sgabor 127219019Sgabor /// Factory method to create an appropriately typed reader for the given 128219019Sgabor /// instrprof file. 129219019Sgabor static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); 130219019Sgabor 131219019Sgabor static Expected<std::unique_ptr<InstrProfReader>> 132219019Sgabor create(std::unique_ptr<MemoryBuffer> Buffer); 133219019Sgabor}; 134219019Sgabor 135219019Sgabor/// Reader for the simple text based instrprof format. 136219019Sgabor/// 137219019Sgabor/// This format is a simple text format that's suitable for test data. Records 138219019Sgabor/// are separated by one or more blank lines, and record fields are separated by 139219019Sgabor/// new lines. 140219019Sgabor/// 141219019Sgabor/// Each record consists of a function name, a function hash, a number of 142219019Sgabor/// counters, and then each counter value, in that order. 143219019Sgaborclass TextInstrProfReader : public InstrProfReader { 144219019Sgaborprivate: 145219019Sgabor /// The profile data file contents. 146219019Sgabor std::unique_ptr<MemoryBuffer> DataBuffer; 147219019Sgabor /// Iterator over the profile data. 148219019Sgabor line_iterator Line; 149219019Sgabor bool IsIRLevelProfile = false; 150219019Sgabor bool HasCSIRLevelProfile = false; 151219019Sgabor 152219019Sgabor Error readValueProfileData(InstrProfRecord &Record); 153219019Sgabor 154219019Sgaborpublic: 155219019Sgabor TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 156219019Sgabor : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 157219019Sgabor TextInstrProfReader(const TextInstrProfReader &) = delete; 158219019Sgabor TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 159219019Sgabor 160219019Sgabor /// Return true if the given buffer is in text instrprof format. 161219019Sgabor static bool hasFormat(const MemoryBuffer &Buffer); 162219019Sgabor 163219019Sgabor bool isIRLevelProfile() const override { return IsIRLevelProfile; } 164219019Sgabor 165219019Sgabor bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } 166219019Sgabor 167219019Sgabor /// Read the header. 168219019Sgabor Error readHeader() override; 169219019Sgabor 170219019Sgabor /// Read a single record. 171219019Sgabor Error readNextRecord(NamedInstrProfRecord &Record) override; 172219019Sgabor 173219019Sgabor InstrProfSymtab &getSymtab() override { 174219019Sgabor assert(Symtab.get()); 175219019Sgabor return *Symtab.get(); 176219019Sgabor } 177219019Sgabor}; 178219019Sgabor 179219019Sgabor/// Reader for the raw instrprof binary format from runtime. 180219019Sgabor/// 181219019Sgabor/// This format is a raw memory dump of the instrumentation-baed profiling data 182219019Sgabor/// from the runtime. It has no index. 183219019Sgabor/// 184219019Sgabor/// Templated on the unsigned type whose size matches pointers on the platform 185219019Sgabor/// that wrote the profile. 186219019Sgabortemplate <class IntPtrT> 187219019Sgaborclass RawInstrProfReader : public InstrProfReader { 188219019Sgaborprivate: 189219019Sgabor /// The profile data file contents. 190219019Sgabor std::unique_ptr<MemoryBuffer> DataBuffer; 191219019Sgabor bool ShouldSwapBytes; 192219019Sgabor // The value of the version field of the raw profile data header. The lower 56 193219019Sgabor // bits specifies the format version and the most significant 8 bits specify 194219019Sgabor // the variant types of the profile. 195219019Sgabor uint64_t Version; 196219019Sgabor uint64_t CountersDelta; 197219019Sgabor uint64_t NamesDelta; 198219019Sgabor const RawInstrProf::ProfileData<IntPtrT> *Data; 199219019Sgabor const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 200219019Sgabor const uint64_t *CountersStart; 201219019Sgabor const char *NamesStart; 202219019Sgabor uint64_t NamesSize; 203219019Sgabor // After value profile is all read, this pointer points to 204219019Sgabor // the header of next profile data (if exists) 205219019Sgabor const uint8_t *ValueDataStart; 206219019Sgabor uint32_t ValueKindLast; 207219019Sgabor uint32_t CurValueDataSize; 208219019Sgabor 209219019Sgaborpublic: 210219019Sgabor RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 211219019Sgabor : DataBuffer(std::move(DataBuffer)) {} 212219019Sgabor RawInstrProfReader(const RawInstrProfReader &) = delete; 213219019Sgabor RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 214219019Sgabor 215219019Sgabor static bool hasFormat(const MemoryBuffer &DataBuffer); 216219019Sgabor Error readHeader() override; 217219019Sgabor Error readNextRecord(NamedInstrProfRecord &Record) override; 218219019Sgabor 219219019Sgabor bool isIRLevelProfile() const override { 220219019Sgabor return (Version & VARIANT_MASK_IR_PROF) != 0; 221219019Sgabor } 222219019Sgabor 223219019Sgabor bool hasCSIRLevelProfile() const override { 224219019Sgabor return (Version & VARIANT_MASK_CSIR_PROF) != 0; 225219019Sgabor } 226219019Sgabor 227219019Sgabor InstrProfSymtab &getSymtab() override { 228219019Sgabor assert(Symtab.get()); 229219019Sgabor return *Symtab.get(); 230219019Sgabor } 231219019Sgabor 232219019Sgaborprivate: 233219019Sgabor Error createSymtab(InstrProfSymtab &Symtab); 234219019Sgabor Error readNextHeader(const char *CurrentPos); 235219019Sgabor Error readHeader(const RawInstrProf::Header &Header); 236219019Sgabor 237219019Sgabor template <class IntT> IntT swap(IntT Int) const { 238219019Sgabor return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 239219019Sgabor } 240219019Sgabor 241219019Sgabor support::endianness getDataEndianness() const { 242219019Sgabor support::endianness HostEndian = getHostEndianness(); 243219019Sgabor if (!ShouldSwapBytes) 244219019Sgabor return HostEndian; 245219019Sgabor if (HostEndian == support::little) 246219019Sgabor return support::big; 247219019Sgabor else 248219019Sgabor return support::little; 249219019Sgabor } 250 251 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 252 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 253 } 254 255 Error readName(NamedInstrProfRecord &Record); 256 Error readFuncHash(NamedInstrProfRecord &Record); 257 Error readRawCounts(InstrProfRecord &Record); 258 Error readValueProfilingData(InstrProfRecord &Record); 259 bool atEnd() const { return Data == DataEnd; } 260 261 void advanceData() { 262 Data++; 263 ValueDataStart += CurValueDataSize; 264 } 265 266 const char *getNextHeaderPos() const { 267 assert(atEnd()); 268 return (const char *)ValueDataStart; 269 } 270 271 /// Get the offset of \p CounterPtr from the start of the counters section of 272 /// the profile. The offset has units of "number of counters", i.e. increasing 273 /// the offset by 1 corresponds to an increase in the *byte offset* by 8. 274 ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { 275 return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 276 } 277 278 const uint64_t *getCounter(ptrdiff_t Offset) const { 279 return CountersStart + Offset; 280 } 281 282 StringRef getName(uint64_t NameRef) const { 283 return Symtab->getFuncName(swap(NameRef)); 284 } 285}; 286 287using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; 288using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; 289 290namespace IndexedInstrProf { 291 292enum class HashT : uint32_t; 293 294} // end namespace IndexedInstrProf 295 296/// Trait for lookups into the on-disk hash table for the binary instrprof 297/// format. 298class InstrProfLookupTrait { 299 std::vector<NamedInstrProfRecord> DataBuffer; 300 IndexedInstrProf::HashT HashType; 301 unsigned FormatVersion; 302 // Endianness of the input value profile data. 303 // It should be LE by default, but can be changed 304 // for testing purpose. 305 support::endianness ValueProfDataEndianness = support::little; 306 307public: 308 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 309 : HashType(HashType), FormatVersion(FormatVersion) {} 310 311 using data_type = ArrayRef<NamedInstrProfRecord>; 312 313 using internal_key_type = StringRef; 314 using external_key_type = StringRef; 315 using hash_value_type = uint64_t; 316 using offset_type = uint64_t; 317 318 static bool EqualKey(StringRef A, StringRef B) { return A == B; } 319 static StringRef GetInternalKey(StringRef K) { return K; } 320 static StringRef GetExternalKey(StringRef K) { return K; } 321 322 hash_value_type ComputeHash(StringRef K); 323 324 static std::pair<offset_type, offset_type> 325 ReadKeyDataLength(const unsigned char *&D) { 326 using namespace support; 327 328 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 329 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 330 return std::make_pair(KeyLen, DataLen); 331 } 332 333 StringRef ReadKey(const unsigned char *D, offset_type N) { 334 return StringRef((const char *)D, N); 335 } 336 337 bool readValueProfilingData(const unsigned char *&D, 338 const unsigned char *const End); 339 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 340 341 // Used for testing purpose only. 342 void setValueProfDataEndianness(support::endianness Endianness) { 343 ValueProfDataEndianness = Endianness; 344 } 345}; 346 347struct InstrProfReaderIndexBase { 348 virtual ~InstrProfReaderIndexBase() = default; 349 350 // Read all the profile records with the same key pointed to the current 351 // iterator. 352 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; 353 354 // Read all the profile records with the key equal to FuncName 355 virtual Error getRecords(StringRef FuncName, 356 ArrayRef<NamedInstrProfRecord> &Data) = 0; 357 virtual void advanceToNextKey() = 0; 358 virtual bool atEnd() const = 0; 359 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 360 virtual uint64_t getVersion() const = 0; 361 virtual bool isIRLevelProfile() const = 0; 362 virtual bool hasCSIRLevelProfile() const = 0; 363 virtual Error populateSymtab(InstrProfSymtab &) = 0; 364}; 365 366using OnDiskHashTableImplV3 = 367 OnDiskIterableChainedHashTable<InstrProfLookupTrait>; 368 369template <typename HashTableImpl> 370class InstrProfReaderItaniumRemapper; 371 372template <typename HashTableImpl> 373class InstrProfReaderIndex : public InstrProfReaderIndexBase { 374private: 375 std::unique_ptr<HashTableImpl> HashTable; 376 typename HashTableImpl::data_iterator RecordIterator; 377 uint64_t FormatVersion; 378 379 friend class InstrProfReaderItaniumRemapper<HashTableImpl>; 380 381public: 382 InstrProfReaderIndex(const unsigned char *Buckets, 383 const unsigned char *const Payload, 384 const unsigned char *const Base, 385 IndexedInstrProf::HashT HashType, uint64_t Version); 386 ~InstrProfReaderIndex() override = default; 387 388 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; 389 Error getRecords(StringRef FuncName, 390 ArrayRef<NamedInstrProfRecord> &Data) override; 391 void advanceToNextKey() override { RecordIterator++; } 392 393 bool atEnd() const override { 394 return RecordIterator == HashTable->data_end(); 395 } 396 397 void setValueProfDataEndianness(support::endianness Endianness) override { 398 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 399 } 400 401 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 402 403 bool isIRLevelProfile() const override { 404 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 405 } 406 407 bool hasCSIRLevelProfile() const override { 408 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; 409 } 410 411 Error populateSymtab(InstrProfSymtab &Symtab) override { 412 return Symtab.create(HashTable->keys()); 413 } 414}; 415 416/// Name matcher supporting fuzzy matching of symbol names to names in profiles. 417class InstrProfReaderRemapper { 418public: 419 virtual ~InstrProfReaderRemapper() {} 420 virtual Error populateRemappings() { return Error::success(); } 421 virtual Error getRecords(StringRef FuncName, 422 ArrayRef<NamedInstrProfRecord> &Data) = 0; 423}; 424 425/// Reader for the indexed binary instrprof format. 426class IndexedInstrProfReader : public InstrProfReader { 427private: 428 /// The profile data file contents. 429 std::unique_ptr<MemoryBuffer> DataBuffer; 430 /// The profile remapping file contents. 431 std::unique_ptr<MemoryBuffer> RemappingBuffer; 432 /// The index into the profile data. 433 std::unique_ptr<InstrProfReaderIndexBase> Index; 434 /// The profile remapping file contents. 435 std::unique_ptr<InstrProfReaderRemapper> Remapper; 436 /// Profile summary data. 437 std::unique_ptr<ProfileSummary> Summary; 438 /// Context sensitive profile summary data. 439 std::unique_ptr<ProfileSummary> CS_Summary; 440 // Index to the current record in the record array. 441 unsigned RecordIndex; 442 443 // Read the profile summary. Return a pointer pointing to one byte past the 444 // end of the summary data if it exists or the input \c Cur. 445 // \c UseCS indicates whether to use the context-sensitive profile summary. 446 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 447 const unsigned char *Cur, bool UseCS); 448 449public: 450 IndexedInstrProfReader( 451 std::unique_ptr<MemoryBuffer> DataBuffer, 452 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) 453 : DataBuffer(std::move(DataBuffer)), 454 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} 455 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 456 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 457 458 /// Return the profile version. 459 uint64_t getVersion() const { return Index->getVersion(); } 460 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } 461 bool hasCSIRLevelProfile() const override { 462 return Index->hasCSIRLevelProfile(); 463 } 464 465 /// Return true if the given buffer is in an indexed instrprof format. 466 static bool hasFormat(const MemoryBuffer &DataBuffer); 467 468 /// Read the file header. 469 Error readHeader() override; 470 /// Read a single record. 471 Error readNextRecord(NamedInstrProfRecord &Record) override; 472 473 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash 474 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 475 uint64_t FuncHash); 476 477 /// Fill Counts with the profile data for the given function name. 478 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 479 std::vector<uint64_t> &Counts); 480 481 /// Return the maximum of all known function counts. 482 /// \c UseCS indicates whether to use the context-sensitive count. 483 uint64_t getMaximumFunctionCount(bool UseCS) { 484 if (UseCS) { 485 assert(CS_Summary && "No context sensitive profile summary"); 486 return CS_Summary->getMaxFunctionCount(); 487 } else { 488 assert(Summary && "No profile summary"); 489 return Summary->getMaxFunctionCount(); 490 } 491 } 492 493 /// Factory method to create an indexed reader. 494 static Expected<std::unique_ptr<IndexedInstrProfReader>> 495 create(const Twine &Path, const Twine &RemappingPath = ""); 496 497 static Expected<std::unique_ptr<IndexedInstrProfReader>> 498 create(std::unique_ptr<MemoryBuffer> Buffer, 499 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); 500 501 // Used for testing purpose only. 502 void setValueProfDataEndianness(support::endianness Endianness) { 503 Index->setValueProfDataEndianness(Endianness); 504 } 505 506 // See description in the base class. This interface is designed 507 // to be used by llvm-profdata (for dumping). Avoid using this when 508 // the client is the compiler. 509 InstrProfSymtab &getSymtab() override; 510 511 /// Return the profile summary. 512 /// \c UseCS indicates whether to use the context-sensitive summary. 513 ProfileSummary &getSummary(bool UseCS) { 514 if (UseCS) { 515 assert(CS_Summary && "No context sensitive summary"); 516 return *(CS_Summary.get()); 517 } else { 518 assert(Summary && "No profile summary"); 519 return *(Summary.get()); 520 } 521 } 522}; 523 524} // end namespace llvm 525 526#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 527