MisExpect.cpp revision 360784
1//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit warnings for potentially incorrect usage of the
10// llvm.expect intrinsic. This utility extracts the threshold values from
11// metadata associated with the instrumented Branch or Switch instruction. The
12// threshold values are then used to determine if a warning should be emmited.
13//
14// MisExpect metadata is generated when llvm.expect intrinsics are lowered see
15// LowerExpectIntrinsic.cpp
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/Transforms/Utils/MisExpect.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/IR/Constants.h"
23#include "llvm/IR/DiagnosticInfo.h"
24#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Instructions.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/Support/BranchProbability.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/FormatVariadic.h"
31#include <cstdint>
32#include <functional>
33#include <numeric>
34
35#define DEBUG_TYPE "misexpect"
36
37using namespace llvm;
38using namespace misexpect;
39
40namespace llvm {
41
42// Command line option to enable/disable the warning when profile data suggests
43// a mismatch with the use of the llvm.expect intrinsic
44static cl::opt<bool> PGOWarnMisExpect(
45    "pgo-warn-misexpect", cl::init(false), cl::Hidden,
46    cl::desc("Use this option to turn on/off "
47             "warnings about incorrect usage of llvm.expect intrinsics."));
48
49} // namespace llvm
50
51namespace {
52
53Instruction *getOprndOrInst(Instruction *I) {
54  assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
55  Instruction *Ret = nullptr;
56  if (auto *B = dyn_cast<BranchInst>(I)) {
57    Ret = dyn_cast<Instruction>(B->getCondition());
58  }
59  // TODO: Find a way to resolve condition location for switches
60  // Using the condition of the switch seems to often resolve to an earlier
61  // point in the program, i.e. the calculation of the switch condition, rather
62  // than the switches location in the source code. Thus, we should use the
63  // instruction to get source code locations rather than the condition to
64  // improve diagnostic output, such as the caret. If the same problem exists
65  // for branch instructions, then we should remove this function and directly
66  // use the instruction
67  //
68  // else if (auto S = dyn_cast<SwitchInst>(I)) {
69  // Ret = I;
70  //}
71  return Ret ? Ret : I;
72}
73
74void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
75                             uint64_t ProfCount, uint64_t TotalCount) {
76  double PercentageCorrect = (double)ProfCount / TotalCount;
77  auto PerString =
78      formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
79  auto RemStr = formatv(
80      "Potential performance regression from use of the llvm.expect intrinsic: "
81      "Annotation was correct on {0} of profiled executions.",
82      PerString);
83  Twine Msg(PerString);
84  Instruction *Cond = getOprndOrInst(I);
85  if (PGOWarnMisExpect)
86    Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
87  OptimizationRemarkEmitter ORE(I->getParent()->getParent());
88  ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
89}
90
91} // namespace
92
93namespace llvm {
94namespace misexpect {
95
96void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights,
97                     LLVMContext &Ctx) {
98  if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) {
99    auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0));
100    if (MisExpectDataName &&
101        MisExpectDataName->getString().equals("misexpect")) {
102      LLVM_DEBUG(llvm::dbgs() << "------------------\n");
103      LLVM_DEBUG(llvm::dbgs()
104                 << "Function: " << I->getFunction()->getName() << "\n");
105      LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n");
106      LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) {
107        llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n";
108      });
109
110      // extract values from misexpect metadata
111      const auto *IndexCint =
112          mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1));
113      const auto *LikelyCInt =
114          mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2));
115      const auto *UnlikelyCInt =
116          mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3));
117
118      if (!IndexCint || !LikelyCInt || !UnlikelyCInt)
119        return;
120
121      const uint64_t Index = IndexCint->getZExtValue();
122      const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue();
123      const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue();
124      const uint64_t ProfileCount = Weights[Index];
125      const uint64_t CaseTotal = std::accumulate(
126          Weights.begin(), Weights.end(), (uint64_t)0, std::plus<uint64_t>());
127      const uint64_t NumUnlikelyTargets = Weights.size() - 1;
128
129      const uint64_t TotalBranchWeight =
130          LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
131
132      const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
133                                                    TotalBranchWeight);
134      uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal);
135
136      LLVM_DEBUG(llvm::dbgs()
137                 << "Unlikely Targets: " << NumUnlikelyTargets << ":\n");
138      LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n");
139      LLVM_DEBUG(llvm::dbgs()
140                 << "Scaled Threshold: " << ScaledThreshold << ":\n");
141      LLVM_DEBUG(llvm::dbgs() << "------------------\n");
142      if (ProfileCount < ScaledThreshold)
143        emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal);
144    }
145  }
146}
147
148void checkFrontendInstrumentation(Instruction &I) {
149  if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) {
150    unsigned NOps = MD->getNumOperands();
151
152    // Only emit misexpect diagnostics if at least 2 branch weights are present.
153    // Less than 2 branch weights means that the profiling metadata is:
154    //    1) incorrect/corrupted
155    //    2) not branch weight metadata
156    //    3) completely deterministic
157    // In these cases we should not emit any diagnostic related to misexpect.
158    if (NOps < 3)
159      return;
160
161    // Operand 0 is a string tag "branch_weights"
162    if (MDString *Tag = cast<MDString>(MD->getOperand(0))) {
163      if (Tag->getString().equals("branch_weights")) {
164        SmallVector<uint32_t, 4> RealWeights(NOps - 1);
165        for (unsigned i = 1; i < NOps; i++) {
166          ConstantInt *Value =
167              mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
168          RealWeights[i - 1] = Value->getZExtValue();
169        }
170        verifyMisExpect(&I, RealWeights, I.getContext());
171      }
172    }
173  }
174}
175
176} // namespace misexpect
177} // namespace llvm
178#undef DEBUG_TYPE
179