HexagonStoreWidening.cpp revision 360784
1//===- HexagonStoreWidening.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Replace sequences of "narrow" stores to adjacent memory locations with
9// a fewer "wide" stores that have the same effect.
10// For example, replace:
11//   S4_storeirb_io  %100, 0, 0   ; store-immediate-byte
12//   S4_storeirb_io  %100, 1, 0   ; store-immediate-byte
13// with
14//   S4_storeirh_io  %100, 0, 0   ; store-immediate-halfword
15// The above is the general idea.  The actual cases handled by the code
16// may be a bit more complex.
17// The purpose of this pass is to reduce the number of outstanding stores,
18// or as one could say, "reduce store queue pressure".  Also, wide stores
19// mean fewer stores, and since there are only two memory instructions allowed
20// per packet, it also means fewer packets, and ultimately fewer cycles.
21//===---------------------------------------------------------------------===//
22
23#include "HexagonInstrInfo.h"
24#include "HexagonRegisterInfo.h"
25#include "HexagonSubtarget.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/MemoryLocation.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineFunctionPass.h"
32#include "llvm/CodeGen/MachineInstr.h"
33#include "llvm/CodeGen/MachineInstrBuilder.h"
34#include "llvm/CodeGen/MachineMemOperand.h"
35#include "llvm/CodeGen/MachineOperand.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/InitializePasses.h"
39#include "llvm/MC/MCInstrDesc.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/MathExtras.h"
44#include "llvm/Support/raw_ostream.h"
45#include <algorithm>
46#include <cassert>
47#include <cstdint>
48#include <iterator>
49#include <vector>
50
51#define DEBUG_TYPE "hexagon-widen-stores"
52
53using namespace llvm;
54
55namespace llvm {
56
57FunctionPass *createHexagonStoreWidening();
58void initializeHexagonStoreWideningPass(PassRegistry&);
59
60} // end namespace llvm
61
62namespace {
63
64  struct HexagonStoreWidening : public MachineFunctionPass {
65    const HexagonInstrInfo      *TII;
66    const HexagonRegisterInfo   *TRI;
67    const MachineRegisterInfo   *MRI;
68    AliasAnalysis               *AA;
69    MachineFunction             *MF;
70
71  public:
72    static char ID;
73
74    HexagonStoreWidening() : MachineFunctionPass(ID) {
75      initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
76    }
77
78    bool runOnMachineFunction(MachineFunction &MF) override;
79
80    StringRef getPassName() const override { return "Hexagon Store Widening"; }
81
82    void getAnalysisUsage(AnalysisUsage &AU) const override {
83      AU.addRequired<AAResultsWrapperPass>();
84      AU.addPreserved<AAResultsWrapperPass>();
85      MachineFunctionPass::getAnalysisUsage(AU);
86    }
87
88    static bool handledStoreType(const MachineInstr *MI);
89
90  private:
91    static const int MaxWideSize = 4;
92
93    using InstrGroup = std::vector<MachineInstr *>;
94    using InstrGroupList = std::vector<InstrGroup>;
95
96    bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
97    bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
98    void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
99        InstrGroup::iterator End, InstrGroup &Group);
100    void createStoreGroups(MachineBasicBlock &MBB,
101        InstrGroupList &StoreGroups);
102    bool processBasicBlock(MachineBasicBlock &MBB);
103    bool processStoreGroup(InstrGroup &Group);
104    bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
105        InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
106    bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
107    bool replaceStores(InstrGroup &OG, InstrGroup &NG);
108    bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
109  };
110
111} // end anonymous namespace
112
113char HexagonStoreWidening::ID = 0;
114
115INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
116                "Hexason Store Widening", false, false)
117INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
118INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
119                "Hexagon Store Widening", false, false)
120
121// Some local helper functions...
122static unsigned getBaseAddressRegister(const MachineInstr *MI) {
123  const MachineOperand &MO = MI->getOperand(0);
124  assert(MO.isReg() && "Expecting register operand");
125  return MO.getReg();
126}
127
128static int64_t getStoreOffset(const MachineInstr *MI) {
129  unsigned OpC = MI->getOpcode();
130  assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
131
132  switch (OpC) {
133    case Hexagon::S4_storeirb_io:
134    case Hexagon::S4_storeirh_io:
135    case Hexagon::S4_storeiri_io: {
136      const MachineOperand &MO = MI->getOperand(1);
137      assert(MO.isImm() && "Expecting immediate offset");
138      return MO.getImm();
139    }
140  }
141  dbgs() << *MI;
142  llvm_unreachable("Store offset calculation missing for a handled opcode");
143  return 0;
144}
145
146static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
147  assert(!MI->memoperands_empty() && "Expecting memory operands");
148  return **MI->memoperands_begin();
149}
150
151// Filtering function: any stores whose opcodes are not "approved" of by
152// this function will not be subjected to widening.
153inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
154  // For now, only handle stores of immediate values.
155  // Also, reject stores to stack slots.
156  unsigned Opc = MI->getOpcode();
157  switch (Opc) {
158    case Hexagon::S4_storeirb_io:
159    case Hexagon::S4_storeirh_io:
160    case Hexagon::S4_storeiri_io:
161      // Base address must be a register. (Implement FI later.)
162      return MI->getOperand(0).isReg();
163    default:
164      return false;
165  }
166}
167
168// Check if the machine memory operand MMO is aliased with any of the
169// stores in the store group Stores.
170bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
171      const MachineMemOperand &MMO) {
172  if (!MMO.getValue())
173    return true;
174
175  MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
176
177  for (auto SI : Stores) {
178    const MachineMemOperand &SMO = getStoreTarget(SI);
179    if (!SMO.getValue())
180      return true;
181
182    MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
183    if (AA->alias(L, SL))
184      return true;
185  }
186
187  return false;
188}
189
190// Check if the machine instruction MI accesses any storage aliased with
191// any store in the group Stores.
192bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
193      const MachineInstr *MI) {
194  for (auto &I : MI->memoperands())
195    if (instrAliased(Stores, *I))
196      return true;
197  return false;
198}
199
200// Inspect a machine basic block, and generate store groups out of stores
201// encountered in the block.
202//
203// A store group is a group of stores that use the same base register,
204// and which can be reordered within that group without altering the
205// semantics of the program.  A single store group could be widened as
206// a whole, if there existed a single store instruction with the same
207// semantics as the entire group.  In many cases, a single store group
208// may need more than one wide store.
209void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
210      InstrGroupList &StoreGroups) {
211  InstrGroup AllInsns;
212
213  // Copy all instruction pointers from the basic block to a temporary
214  // list.  This will allow operating on the list, and modifying its
215  // elements without affecting the basic block.
216  for (auto &I : MBB)
217    AllInsns.push_back(&I);
218
219  // Traverse all instructions in the AllInsns list, and if we encounter
220  // a store, then try to create a store group starting at that instruction
221  // i.e. a sequence of independent stores that can be widened.
222  for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
223    MachineInstr *MI = *I;
224    // Skip null pointers (processed instructions).
225    if (!MI || !handledStoreType(MI))
226      continue;
227
228    // Found a store.  Try to create a store group.
229    InstrGroup G;
230    createStoreGroup(MI, I+1, E, G);
231    if (G.size() > 1)
232      StoreGroups.push_back(G);
233  }
234}
235
236// Create a single store group.  The stores need to be independent between
237// themselves, and also there cannot be other instructions between them
238// that could read or modify storage being stored into.
239void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
240      InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
241  assert(handledStoreType(BaseStore) && "Unexpected instruction");
242  unsigned BaseReg = getBaseAddressRegister(BaseStore);
243  InstrGroup Other;
244
245  Group.push_back(BaseStore);
246
247  for (auto I = Begin; I != End; ++I) {
248    MachineInstr *MI = *I;
249    if (!MI)
250      continue;
251
252    if (handledStoreType(MI)) {
253      // If this store instruction is aliased with anything already in the
254      // group, terminate the group now.
255      if (instrAliased(Group, getStoreTarget(MI)))
256        return;
257      // If this store is aliased to any of the memory instructions we have
258      // seen so far (that are not a part of this group), terminate the group.
259      if (instrAliased(Other, getStoreTarget(MI)))
260        return;
261
262      unsigned BR = getBaseAddressRegister(MI);
263      if (BR == BaseReg) {
264        Group.push_back(MI);
265        *I = nullptr;
266        continue;
267      }
268    }
269
270    // Assume calls are aliased to everything.
271    if (MI->isCall() || MI->hasUnmodeledSideEffects())
272      return;
273
274    if (MI->mayLoadOrStore()) {
275      if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
276        return;
277      Other.push_back(MI);
278    }
279  } // for
280}
281
282// Check if store instructions S1 and S2 are adjacent.  More precisely,
283// S2 has to access memory immediately following that accessed by S1.
284bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
285      const MachineInstr *S2) {
286  if (!handledStoreType(S1) || !handledStoreType(S2))
287    return false;
288
289  const MachineMemOperand &S1MO = getStoreTarget(S1);
290
291  // Currently only handling immediate stores.
292  int Off1 = S1->getOperand(1).getImm();
293  int Off2 = S2->getOperand(1).getImm();
294
295  return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
296                     : int(Off1+S1MO.getSize()) == Off2;
297}
298
299/// Given a sequence of adjacent stores, and a maximum size of a single wide
300/// store, pick a group of stores that  can be replaced by a single store
301/// of size not exceeding MaxSize.  The selected sequence will be recorded
302/// in OG ("old group" of instructions).
303/// OG should be empty on entry, and should be left empty if the function
304/// fails.
305bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
306      InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
307      unsigned MaxSize) {
308  assert(Begin != End && "No instructions to analyze");
309  assert(OG.empty() && "Old group not empty on entry");
310
311  if (std::distance(Begin, End) <= 1)
312    return false;
313
314  MachineInstr *FirstMI = *Begin;
315  assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
316  const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
317  unsigned Alignment = FirstMMO.getAlignment();
318  unsigned SizeAccum = FirstMMO.getSize();
319  unsigned FirstOffset = getStoreOffset(FirstMI);
320
321  // The initial value of SizeAccum should always be a power of 2.
322  assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
323
324  // If the size of the first store equals to or exceeds the limit, do nothing.
325  if (SizeAccum >= MaxSize)
326    return false;
327
328  // If the size of the first store is greater than or equal to the address
329  // stored to, then the store cannot be made any wider.
330  if (SizeAccum >= Alignment)
331    return false;
332
333  // The offset of a store will put restrictions on how wide the store can be.
334  // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
335  // If the first store already exhausts the offset limits, quit.  Test this
336  // by checking if the next wider size would exceed the limit.
337  if ((2*SizeAccum-1) & FirstOffset)
338    return false;
339
340  OG.push_back(FirstMI);
341  MachineInstr *S1 = FirstMI;
342
343  // Pow2Num will be the largest number of elements in OG such that the sum
344  // of sizes of stores 0...Pow2Num-1 will be a power of 2.
345  unsigned Pow2Num = 1;
346  unsigned Pow2Size = SizeAccum;
347
348  // Be greedy: keep accumulating stores as long as they are to adjacent
349  // memory locations, and as long as the total number of bytes stored
350  // does not exceed the limit (MaxSize).
351  // Keep track of when the total size covered is a power of 2, since
352  // this is a size a single store can cover.
353  for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
354    MachineInstr *S2 = *I;
355    // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
356    // any other store to fill the "hole".
357    if (!storesAreAdjacent(S1, S2))
358      break;
359
360    unsigned S2Size = getStoreTarget(S2).getSize();
361    if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
362      break;
363
364    OG.push_back(S2);
365    SizeAccum += S2Size;
366    if (isPowerOf2_32(SizeAccum)) {
367      Pow2Num = OG.size();
368      Pow2Size = SizeAccum;
369    }
370    if ((2*Pow2Size-1) & FirstOffset)
371      break;
372
373    S1 = S2;
374  }
375
376  // The stores don't add up to anything that can be widened.  Clean up.
377  if (Pow2Num <= 1) {
378    OG.clear();
379    return false;
380  }
381
382  // Only leave the stored being widened.
383  OG.resize(Pow2Num);
384  TotalSize = Pow2Size;
385  return true;
386}
387
388/// Given an "old group" OG of stores, create a "new group" NG of instructions
389/// to replace them.  Ideally, NG would only have a single instruction in it,
390/// but that may only be possible for store-immediate.
391bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
392      unsigned TotalSize) {
393  // XXX Current limitations:
394  // - only expect stores of immediate values in OG,
395  // - only handle a TotalSize of up to 4.
396
397  if (TotalSize > 4)
398    return false;
399
400  unsigned Acc = 0;  // Value accumulator.
401  unsigned Shift = 0;
402
403  for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
404    MachineInstr *MI = *I;
405    const MachineMemOperand &MMO = getStoreTarget(MI);
406    MachineOperand &SO = MI->getOperand(2);  // Source.
407    assert(SO.isImm() && "Expecting an immediate operand");
408
409    unsigned NBits = MMO.getSize()*8;
410    unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
411    unsigned Val = (SO.getImm() & Mask) << Shift;
412    Acc |= Val;
413    Shift += NBits;
414  }
415
416  MachineInstr *FirstSt = OG.front();
417  DebugLoc DL = OG.back()->getDebugLoc();
418  const MachineMemOperand &OldM = getStoreTarget(FirstSt);
419  MachineMemOperand *NewM =
420    MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
421                             TotalSize, OldM.getAlignment(),
422                             OldM.getAAInfo());
423
424  if (Acc < 0x10000) {
425    // Create mem[hw] = #Acc
426    unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
427                    (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
428    assert(WOpc && "Unexpected size");
429
430    int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
431    const MCInstrDesc &StD = TII->get(WOpc);
432    MachineOperand &MR = FirstSt->getOperand(0);
433    int64_t Off = FirstSt->getOperand(1).getImm();
434    MachineInstr *StI =
435        BuildMI(*MF, DL, StD)
436            .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg())
437            .addImm(Off)
438            .addImm(Val);
439    StI->addMemOperand(*MF, NewM);
440    NG.push_back(StI);
441  } else {
442    // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
443    const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
444    const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
445    Register VReg = MF->getRegInfo().createVirtualRegister(RC);
446    MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
447                           .addImm(int(Acc));
448    NG.push_back(TfrI);
449
450    unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
451                    (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
452    assert(WOpc && "Unexpected size");
453
454    const MCInstrDesc &StD = TII->get(WOpc);
455    MachineOperand &MR = FirstSt->getOperand(0);
456    int64_t Off = FirstSt->getOperand(1).getImm();
457    MachineInstr *StI =
458        BuildMI(*MF, DL, StD)
459            .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg())
460            .addImm(Off)
461            .addReg(VReg, RegState::Kill);
462    StI->addMemOperand(*MF, NewM);
463    NG.push_back(StI);
464  }
465
466  return true;
467}
468
469// Replace instructions from the old group OG with instructions from the
470// new group NG.  Conceptually, remove all instructions in OG, and then
471// insert all instructions in NG, starting at where the first instruction
472// from OG was (in the order in which they appeared in the basic block).
473// (The ordering in OG does not have to match the order in the basic block.)
474bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
475  LLVM_DEBUG({
476    dbgs() << "Replacing:\n";
477    for (auto I : OG)
478      dbgs() << "  " << *I;
479    dbgs() << "with\n";
480    for (auto I : NG)
481      dbgs() << "  " << *I;
482  });
483
484  MachineBasicBlock *MBB = OG.back()->getParent();
485  MachineBasicBlock::iterator InsertAt = MBB->end();
486
487  // Need to establish the insertion point.  The best one is right before
488  // the first store in the OG, but in the order in which the stores occur
489  // in the program list.  Since the ordering in OG does not correspond
490  // to the order in the program list, we need to do some work to find
491  // the insertion point.
492
493  // Create a set of all instructions in OG (for quick lookup).
494  SmallPtrSet<MachineInstr*, 4> InstrSet;
495  for (auto I : OG)
496    InstrSet.insert(I);
497
498  // Traverse the block, until we hit an instruction from OG.
499  for (auto &I : *MBB) {
500    if (InstrSet.count(&I)) {
501      InsertAt = I;
502      break;
503    }
504  }
505
506  assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
507
508  bool AtBBStart = false;
509
510  // InsertAt points at the first instruction that will be removed.  We need
511  // to move it out of the way, so it remains valid after removing all the
512  // old stores, and so we are able to recover it back to the proper insertion
513  // position.
514  if (InsertAt != MBB->begin())
515    --InsertAt;
516  else
517    AtBBStart = true;
518
519  for (auto I : OG)
520    I->eraseFromParent();
521
522  if (!AtBBStart)
523    ++InsertAt;
524  else
525    InsertAt = MBB->begin();
526
527  for (auto I : NG)
528    MBB->insert(InsertAt, I);
529
530  return true;
531}
532
533// Break up the group into smaller groups, each of which can be replaced by
534// a single wide store.  Widen each such smaller group and replace the old
535// instructions with the widened ones.
536bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
537  bool Changed = false;
538  InstrGroup::iterator I = Group.begin(), E = Group.end();
539  InstrGroup OG, NG;   // Old and new groups.
540  unsigned CollectedSize;
541
542  while (I != E) {
543    OG.clear();
544    NG.clear();
545
546    bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
547                createWideStores(OG, NG, CollectedSize)              &&
548                replaceStores(OG, NG);
549    if (!Succ)
550      continue;
551
552    assert(OG.size() > 1 && "Created invalid group");
553    assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
554    I += OG.size()-1;
555
556    Changed = true;
557  }
558
559  return Changed;
560}
561
562// Process a single basic block: create the store groups, and replace them
563// with the widened stores, if possible.  Processing of each basic block
564// is independent from processing of any other basic block.  This transfor-
565// mation could be stopped after having processed any basic block without
566// any ill effects (other than not having performed widening in the unpro-
567// cessed blocks).  Also, the basic blocks can be processed in any order.
568bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
569  InstrGroupList SGs;
570  bool Changed = false;
571
572  createStoreGroups(MBB, SGs);
573
574  auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
575    return getStoreOffset(A) < getStoreOffset(B);
576  };
577  for (auto &G : SGs) {
578    assert(G.size() > 1 && "Store group with fewer than 2 elements");
579    llvm::sort(G, Less);
580
581    Changed |= processStoreGroup(G);
582  }
583
584  return Changed;
585}
586
587bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
588  if (skipFunction(MFn.getFunction()))
589    return false;
590
591  MF = &MFn;
592  auto &ST = MFn.getSubtarget<HexagonSubtarget>();
593  TII = ST.getInstrInfo();
594  TRI = ST.getRegisterInfo();
595  MRI = &MFn.getRegInfo();
596  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
597
598  bool Changed = false;
599
600  for (auto &B : MFn)
601    Changed |= processBasicBlock(B);
602
603  return Changed;
604}
605
606FunctionPass *llvm::createHexagonStoreWidening() {
607  return new HexagonStoreWidening();
608}
609