BottleneckAnalysis.cpp revision 360784
1//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the functionalities used by the BottleneckAnalysis
11/// to report bottleneck info.
12///
13//===----------------------------------------------------------------------===//
14
15#include "Views/BottleneckAnalysis.h"
16#include "llvm/MC/MCInst.h"
17#include "llvm/MCA/Support.h"
18#include "llvm/Support/Format.h"
19#include "llvm/Support/FormattedStream.h"
20
21namespace llvm {
22namespace mca {
23
24#define DEBUG_TYPE "llvm-mca"
25
26PressureTracker::PressureTracker(const MCSchedModel &Model)
27    : SM(Model),
28      ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
29      ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
30      ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
31      ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
32  computeProcResourceMasks(SM, ProcResID2Mask);
33
34  // Ignore the invalid resource at index zero.
35  unsigned NextResourceUsersIdx = 0;
36  for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
37    const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
38    ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
39    NextResourceUsersIdx += ProcResource.NumUnits;
40    uint64_t ResourceMask = ProcResID2Mask[I];
41    ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
42  }
43
44  ResourceUsers.resize(NextResourceUsersIdx);
45  std::fill(ResourceUsers.begin(), ResourceUsers.end(),
46            std::make_pair<unsigned, unsigned>(~0U, 0U));
47}
48
49void PressureTracker::getResourceUsers(uint64_t ResourceMask,
50                                       SmallVectorImpl<User> &Users) const {
51  unsigned Index = getResourceStateIndex(ResourceMask);
52  unsigned ProcResID = ResIdx2ProcResID[Index];
53  const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
54  for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
55    const User U = getResourceUser(ProcResID, I);
56    if (U.second && IPI.find(U.first) != IPI.end())
57      Users.emplace_back(U);
58  }
59}
60
61void PressureTracker::onInstructionDispatched(unsigned IID) {
62  IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
63}
64
65void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
66
67void PressureTracker::handleInstructionIssuedEvent(
68    const HWInstructionIssuedEvent &Event) {
69  unsigned IID = Event.IR.getSourceIndex();
70  using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
71  using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
72  for (const ResourceUse &Use : Event.UsedResources) {
73    const ResourceRef &RR = Use.first;
74    unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
75    Index += countTrailingZeros(RR.second);
76    ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
77  }
78}
79
80void PressureTracker::updateResourcePressureDistribution(
81    uint64_t CumulativeMask) {
82  while (CumulativeMask) {
83    uint64_t Current = CumulativeMask & (-CumulativeMask);
84    unsigned ResIdx = getResourceStateIndex(Current);
85    unsigned ProcResID = ResIdx2ProcResID[ResIdx];
86    uint64_t Mask = ProcResID2Mask[ProcResID];
87
88    if (Mask == Current) {
89      ResourcePressureDistribution[ProcResID]++;
90      CumulativeMask ^= Current;
91      continue;
92    }
93
94    Mask ^= Current;
95    while (Mask) {
96      uint64_t SubUnit = Mask & (-Mask);
97      ResIdx = getResourceStateIndex(SubUnit);
98      ProcResID = ResIdx2ProcResID[ResIdx];
99      ResourcePressureDistribution[ProcResID]++;
100      Mask ^= SubUnit;
101    }
102
103    CumulativeMask ^= Current;
104  }
105}
106
107void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
108  assert(Event.Reason != HWPressureEvent::INVALID &&
109         "Unexpected invalid event!");
110
111  switch (Event.Reason) {
112  default:
113    break;
114
115  case HWPressureEvent::RESOURCES: {
116    const uint64_t ResourceMask = Event.ResourceMask;
117    updateResourcePressureDistribution(Event.ResourceMask);
118
119    for (const InstRef &IR : Event.AffectedInstructions) {
120      const Instruction &IS = *IR.getInstruction();
121      unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
122      if (!BusyResources)
123        continue;
124
125      unsigned IID = IR.getSourceIndex();
126      IPI[IID].ResourcePressureCycles++;
127    }
128    break;
129  }
130
131  case HWPressureEvent::REGISTER_DEPS:
132    for (const InstRef &IR : Event.AffectedInstructions) {
133      unsigned IID = IR.getSourceIndex();
134      IPI[IID].RegisterPressureCycles++;
135    }
136    break;
137
138  case HWPressureEvent::MEMORY_DEPS:
139    for (const InstRef &IR : Event.AffectedInstructions) {
140      unsigned IID = IR.getSourceIndex();
141      IPI[IID].MemoryPressureCycles++;
142    }
143  }
144}
145
146#ifndef NDEBUG
147void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
148                                         const DependencyEdge &DepEdge,
149                                         MCInstPrinter &MCIP) const {
150  unsigned FromIID = DepEdge.FromIID;
151  unsigned ToIID = DepEdge.ToIID;
152  assert(FromIID < ToIID && "Graph should be acyclic!");
153
154  const DependencyEdge::Dependency &DE = DepEdge.Dep;
155  assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
156
157  OS << " FROM: " << FromIID << " TO: " << ToIID << "             ";
158  if (DE.Type == DependencyEdge::DT_REGISTER) {
159    OS << " - REGISTER: ";
160    MCIP.printRegName(OS, DE.ResourceOrRegID);
161  } else if (DE.Type == DependencyEdge::DT_MEMORY) {
162    OS << " - MEMORY";
163  } else {
164    assert(DE.Type == DependencyEdge::DT_RESOURCE &&
165           "Unsupported dependency type!");
166    OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
167  }
168  OS << " - COST: " << DE.Cost << '\n';
169}
170#endif // NDEBUG
171
172void DependencyGraph::pruneEdges(unsigned Iterations) {
173  for (DGNode &N : Nodes) {
174    unsigned NumPruned = 0;
175    const unsigned Size = N.OutgoingEdges.size();
176    // Use a cut-off threshold to prune edges with a low frequency.
177    for (unsigned I = 0, E = Size; I < E; ++I) {
178      DependencyEdge &Edge = N.OutgoingEdges[I];
179      if (Edge.Frequency == Iterations)
180        continue;
181      double Factor = (double)Edge.Frequency / Iterations;
182      if (0.10 < Factor)
183        continue;
184      Nodes[Edge.ToIID].NumPredecessors--;
185      std::swap(Edge, N.OutgoingEdges[E - 1]);
186      --E;
187      ++NumPruned;
188    }
189
190    if (NumPruned)
191      N.OutgoingEdges.resize(Size - NumPruned);
192  }
193}
194
195void DependencyGraph::initializeRootSet(
196    SmallVectorImpl<unsigned> &RootSet) const {
197  for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
198    const DGNode &N = Nodes[I];
199    if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
200      RootSet.emplace_back(I);
201  }
202}
203
204void DependencyGraph::propagateThroughEdges(
205    SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
206  SmallVector<unsigned, 8> ToVisit;
207
208  // A critical sequence is computed as the longest path from a node of the
209  // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
210  // composed of nodes with at least one successor, and no predecessors.
211  //
212  // Each node of the graph starts with an initial default cost of zero.  The
213  // cost of a node is a measure of criticality: the higher the cost, the bigger
214  // is the performance impact.
215  // For register and memory dependencies, the cost is a function of the write
216  // latency as well as the actual delay (in cycles) caused to users.
217  // For processor resource dependencies, the cost is a function of the resource
218  // pressure. Resource interferences with low frequency values are ignored.
219  //
220  // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
221  // the inner loop selects (i.e. visits) a node N from a set of `unvisited
222  // nodes`, and then propagates the cost of N to all its neighbors.
223  //
224  // The `unvisited nodes` set initially contains all the nodes from the
225  // RootSet.  A node N is added to the `unvisited nodes` if all its
226  // predecessors have been visited already.
227  //
228  // For simplicity, every node tracks the number of unvisited incoming edges in
229  // field `NumVisitedPredecessors`.  When the value of that field drops to
230  // zero, then the corresponding node is added to a `ToVisit` set.
231  //
232  // At the end of every iteration of the outer loop, set `ToVisit` becomes our
233  // new `unvisited nodes` set.
234  //
235  // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
236  // is empty. This algorithm works under the assumption that the graph is
237  // acyclic.
238  do {
239    for (unsigned IID : RootSet) {
240      const DGNode &N = Nodes[IID];
241      for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
242        unsigned ToIID = DepEdge.ToIID;
243        DGNode &To = Nodes[ToIID];
244        uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
245        // Check if this is the most expensive incoming edge seen so far.  In
246        // case, update the total cost of the destination node (ToIID), as well
247        // its field `CriticalPredecessor`.
248        if (Cost > To.Cost) {
249          To.CriticalPredecessor = DepEdge;
250          To.Cost = Cost;
251          To.Depth = N.Depth + 1;
252        }
253        To.NumVisitedPredecessors++;
254        if (To.NumVisitedPredecessors == To.NumPredecessors)
255          ToVisit.emplace_back(ToIID);
256      }
257    }
258
259    std::swap(RootSet, ToVisit);
260    ToVisit.clear();
261  } while (!RootSet.empty());
262}
263
264void DependencyGraph::getCriticalSequence(
265    SmallVectorImpl<const DependencyEdge *> &Seq) const {
266  // At this stage, nodes of the graph have been already visited, and costs have
267  // been propagated through the edges (see method `propagateThroughEdges()`).
268
269  // Identify the node N with the highest cost in the graph. By construction,
270  // that node is the last instruction of our critical sequence.
271  // Field N.Depth would tell us the total length of the sequence.
272  //
273  // To obtain the sequence of critical edges, we simply follow the chain of critical
274  // predecessors starting from node N (field DGNode::CriticalPredecessor).
275  const auto It = std::max_element(
276      Nodes.begin(), Nodes.end(),
277      [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
278  unsigned IID = std::distance(Nodes.begin(), It);
279  Seq.resize(Nodes[IID].Depth);
280  for (unsigned I = Seq.size(), E = 0; I > E; --I) {
281    const DGNode &N = Nodes[IID];
282    Seq[I - 1] = &N.CriticalPredecessor;
283    IID = N.CriticalPredecessor.FromIID;
284  }
285}
286
287static void printInstruction(formatted_raw_ostream &FOS,
288                             const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
289                             const MCInst &MCI,
290                             bool UseDifferentColor = false) {
291  std::string Instruction;
292  raw_string_ostream InstrStream(Instruction);
293
294  FOS.PadToColumn(14);
295
296  MCIP.printInst(&MCI, 0, "", STI, InstrStream);
297  InstrStream.flush();
298
299  if (UseDifferentColor)
300    FOS.changeColor(raw_ostream::CYAN, true, false);
301  FOS << StringRef(Instruction).ltrim();
302  if (UseDifferentColor)
303    FOS.resetColor();
304}
305
306void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
307  // Early exit if no bottlenecks were found during the simulation.
308  if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
309    return;
310
311  SmallVector<const DependencyEdge *, 16> Seq;
312  DG.getCriticalSequence(Seq);
313  if (Seq.empty())
314    return;
315
316  OS << "\nCritical sequence based on the simulation:\n\n";
317
318  const DependencyEdge &FirstEdge = *Seq[0];
319  unsigned FromIID = FirstEdge.FromIID % Source.size();
320  unsigned ToIID = FirstEdge.ToIID % Source.size();
321  bool IsLoopCarried = FromIID >= ToIID;
322
323  formatted_raw_ostream FOS(OS);
324  FOS.PadToColumn(14);
325  FOS << "Instruction";
326  FOS.PadToColumn(58);
327  FOS << "Dependency Information";
328
329  bool HasColors = FOS.has_colors();
330
331  unsigned CurrentIID = 0;
332  if (IsLoopCarried) {
333    FOS << "\n +----< " << FromIID << ".";
334    printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
335    FOS << "\n |\n |    < loop carried > \n |";
336  } else {
337    while (CurrentIID < FromIID) {
338      FOS << "\n        " << CurrentIID << ".";
339      printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
340      CurrentIID++;
341    }
342
343    FOS << "\n +----< " << CurrentIID << ".";
344    printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
345    CurrentIID++;
346  }
347
348  for (const DependencyEdge *&DE : Seq) {
349    ToIID = DE->ToIID % Source.size();
350    unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
351
352    while (CurrentIID < LastIID) {
353      FOS << "\n |      " << CurrentIID << ".";
354      printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
355      CurrentIID++;
356    }
357
358    if (CurrentIID == ToIID) {
359      FOS << "\n +----> " << ToIID << ".";
360      printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
361    } else {
362      FOS << "\n |\n |    < loop carried > \n |"
363          << "\n +----> " << ToIID << ".";
364      printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
365    }
366    FOS.PadToColumn(58);
367
368    const DependencyEdge::Dependency &Dep = DE->Dep;
369    if (HasColors)
370      FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
371
372    if (Dep.Type == DependencyEdge::DT_REGISTER) {
373      FOS << "## REGISTER dependency:  ";
374      if (HasColors)
375        FOS.changeColor(raw_ostream::MAGENTA, true, false);
376      MCIP.printRegName(FOS, Dep.ResourceOrRegID);
377    } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
378      FOS << "## MEMORY dependency.";
379    } else {
380      assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
381             "Unsupported dependency type!");
382      FOS << "## RESOURCE interference:  ";
383      if (HasColors)
384        FOS.changeColor(raw_ostream::MAGENTA, true, false);
385      FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
386      if (HasColors) {
387        FOS.resetColor();
388        FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
389      }
390      FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
391          << "% ]";
392    }
393    if (HasColors)
394      FOS.resetColor();
395    ++CurrentIID;
396  }
397
398  while (CurrentIID < Source.size()) {
399    FOS << "\n        " << CurrentIID << ".";
400    printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
401    CurrentIID++;
402  }
403
404  FOS << '\n';
405  FOS.flush();
406}
407
408#ifndef NDEBUG
409void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
410  OS << "\nREG DEPS\n";
411  for (const DGNode &Node : Nodes)
412    for (const DependencyEdge &DE : Node.OutgoingEdges)
413      if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
414        dumpDependencyEdge(OS, DE, MCIP);
415
416  OS << "\nMEM DEPS\n";
417  for (const DGNode &Node : Nodes)
418    for (const DependencyEdge &DE : Node.OutgoingEdges)
419      if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
420        dumpDependencyEdge(OS, DE, MCIP);
421
422  OS << "\nRESOURCE DEPS\n";
423  for (const DGNode &Node : Nodes)
424    for (const DependencyEdge &DE : Node.OutgoingEdges)
425      if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
426        dumpDependencyEdge(OS, DE, MCIP);
427}
428#endif // NDEBUG
429
430void DependencyGraph::addDependency(unsigned From, unsigned To,
431                                    DependencyEdge::Dependency &&Dep) {
432  DGNode &NodeFrom = Nodes[From];
433  DGNode &NodeTo = Nodes[To];
434  SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
435
436  auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
437    return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
438  });
439
440  if (It != Vec.end()) {
441    It->Dep.Cost += Dep.Cost;
442    It->Frequency++;
443    return;
444  }
445
446  DependencyEdge DE = {Dep, From, To, 1};
447  Vec.emplace_back(DE);
448  NodeTo.NumPredecessors++;
449}
450
451BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
452                                       MCInstPrinter &Printer,
453                                       ArrayRef<MCInst> S, unsigned NumIter)
454    : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
455      Source(S), Iterations(NumIter), TotalCycles(0),
456      PressureIncreasedBecauseOfResources(false),
457      PressureIncreasedBecauseOfRegisterDependencies(false),
458      PressureIncreasedBecauseOfMemoryDependencies(false),
459      SeenStallCycles(false), BPI() {}
460
461void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
462                                        unsigned RegID, unsigned Cost) {
463  bool IsLoopCarried = From >= To;
464  unsigned SourceSize = Source.size();
465  if (IsLoopCarried) {
466    DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
467    DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
468    return;
469  }
470  DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
471}
472
473void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
474                                      unsigned Cost) {
475  bool IsLoopCarried = From >= To;
476  unsigned SourceSize = Source.size();
477  if (IsLoopCarried) {
478    DG.addMemoryDep(From, To + SourceSize, Cost);
479    DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
480    return;
481  }
482  DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
483}
484
485void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
486                                        uint64_t Mask, unsigned Cost) {
487  bool IsLoopCarried = From >= To;
488  unsigned SourceSize = Source.size();
489  if (IsLoopCarried) {
490    DG.addResourceDep(From, To + SourceSize, Mask, Cost);
491    DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
492    return;
493  }
494  DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
495}
496
497void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
498  const unsigned IID = Event.IR.getSourceIndex();
499  if (Event.Type == HWInstructionEvent::Dispatched) {
500    Tracker.onInstructionDispatched(IID);
501    return;
502  }
503  if (Event.Type == HWInstructionEvent::Executed) {
504    Tracker.onInstructionExecuted(IID);
505    return;
506  }
507
508  if (Event.Type != HWInstructionEvent::Issued)
509    return;
510
511  const Instruction &IS = *Event.IR.getInstruction();
512  unsigned To = IID % Source.size();
513
514  unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
515  uint64_t ResourceMask = IS.getCriticalResourceMask();
516  SmallVector<std::pair<unsigned, unsigned>, 4> Users;
517  while (ResourceMask) {
518    uint64_t Current = ResourceMask & (-ResourceMask);
519    Tracker.getResourceUsers(Current, Users);
520    for (const std::pair<unsigned, unsigned> &U : Users)
521      addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
522    Users.clear();
523    ResourceMask ^= Current;
524  }
525
526  const CriticalDependency &RegDep = IS.getCriticalRegDep();
527  if (RegDep.Cycles) {
528    Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
529    unsigned From = RegDep.IID % Source.size();
530    addRegisterDep(From, To, RegDep.RegID, Cycles);
531  }
532
533  const CriticalDependency &MemDep = IS.getCriticalMemDep();
534  if (MemDep.Cycles) {
535    Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
536    unsigned From = MemDep.IID % Source.size();
537    addMemoryDep(From, To, Cycles);
538  }
539
540  Tracker.handleInstructionIssuedEvent(
541      static_cast<const HWInstructionIssuedEvent &>(Event));
542
543  // Check if this is the last simulated instruction.
544  if (IID == ((Iterations * Source.size()) - 1))
545    DG.finalizeGraph(Iterations);
546}
547
548void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
549  assert(Event.Reason != HWPressureEvent::INVALID &&
550         "Unexpected invalid event!");
551
552  Tracker.handlePressureEvent(Event);
553
554  switch (Event.Reason) {
555  default:
556    break;
557
558  case HWPressureEvent::RESOURCES:
559    PressureIncreasedBecauseOfResources = true;
560    break;
561  case HWPressureEvent::REGISTER_DEPS:
562    PressureIncreasedBecauseOfRegisterDependencies = true;
563    break;
564  case HWPressureEvent::MEMORY_DEPS:
565    PressureIncreasedBecauseOfMemoryDependencies = true;
566    break;
567  }
568}
569
570void BottleneckAnalysis::onCycleEnd() {
571  ++TotalCycles;
572
573  bool PressureIncreasedBecauseOfDataDependencies =
574      PressureIncreasedBecauseOfRegisterDependencies ||
575      PressureIncreasedBecauseOfMemoryDependencies;
576  if (!PressureIncreasedBecauseOfResources &&
577      !PressureIncreasedBecauseOfDataDependencies)
578    return;
579
580  ++BPI.PressureIncreaseCycles;
581  if (PressureIncreasedBecauseOfRegisterDependencies)
582    ++BPI.RegisterDependencyCycles;
583  if (PressureIncreasedBecauseOfMemoryDependencies)
584    ++BPI.MemoryDependencyCycles;
585  if (PressureIncreasedBecauseOfDataDependencies)
586    ++BPI.DataDependencyCycles;
587  if (PressureIncreasedBecauseOfResources)
588    ++BPI.ResourcePressureCycles;
589  PressureIncreasedBecauseOfResources = false;
590  PressureIncreasedBecauseOfRegisterDependencies = false;
591  PressureIncreasedBecauseOfMemoryDependencies = false;
592}
593
594void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
595  if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
596    OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
597    return;
598  }
599
600  double PressurePerCycle =
601      (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
602  double ResourcePressurePerCycle =
603      (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
604  double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
605  double RegDepPressurePerCycle =
606      (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
607  double MemDepPressurePerCycle =
608      (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
609
610  OS << "\n\nCycles with backend pressure increase [ "
611     << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
612
613  OS << "\nThroughput Bottlenecks: "
614     << "\n  Resource Pressure       [ "
615     << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
616     << "% ]";
617
618  if (BPI.PressureIncreaseCycles) {
619    ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
620    const MCSchedModel &SM = STI.getSchedModel();
621    for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
622      unsigned ResourceCycles = Distribution[I];
623      if (ResourceCycles) {
624        double Frequency = (double)ResourceCycles * 100 / TotalCycles;
625        const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
626        OS << "\n  - " << PRDesc.Name << "  [ "
627           << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
628      }
629    }
630  }
631
632  OS << "\n  Data Dependencies:      [ "
633     << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
634  OS << "\n  - Register Dependencies [ "
635     << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
636     << "% ]";
637  OS << "\n  - Memory Dependencies   [ "
638     << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
639     << "% ]\n";
640}
641
642void BottleneckAnalysis::printView(raw_ostream &OS) const {
643  std::string Buffer;
644  raw_string_ostream TempStream(Buffer);
645  printBottleneckHints(TempStream);
646  TempStream.flush();
647  OS << Buffer;
648  printCriticalSequence(OS);
649}
650
651} // namespace mca.
652} // namespace llvm
653