HexagonStoreWidening.cpp revision 360784
1//===- HexagonStoreWidening.cpp -------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// Replace sequences of "narrow" stores to adjacent memory locations with 9// a fewer "wide" stores that have the same effect. 10// For example, replace: 11// S4_storeirb_io %100, 0, 0 ; store-immediate-byte 12// S4_storeirb_io %100, 1, 0 ; store-immediate-byte 13// with 14// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword 15// The above is the general idea. The actual cases handled by the code 16// may be a bit more complex. 17// The purpose of this pass is to reduce the number of outstanding stores, 18// or as one could say, "reduce store queue pressure". Also, wide stores 19// mean fewer stores, and since there are only two memory instructions allowed 20// per packet, it also means fewer packets, and ultimately fewer cycles. 21//===---------------------------------------------------------------------===// 22 23#include "HexagonInstrInfo.h" 24#include "HexagonRegisterInfo.h" 25#include "HexagonSubtarget.h" 26#include "llvm/ADT/SmallPtrSet.h" 27#include "llvm/Analysis/AliasAnalysis.h" 28#include "llvm/Analysis/MemoryLocation.h" 29#include "llvm/CodeGen/MachineBasicBlock.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineFunctionPass.h" 32#include "llvm/CodeGen/MachineInstr.h" 33#include "llvm/CodeGen/MachineInstrBuilder.h" 34#include "llvm/CodeGen/MachineMemOperand.h" 35#include "llvm/CodeGen/MachineOperand.h" 36#include "llvm/CodeGen/MachineRegisterInfo.h" 37#include "llvm/IR/DebugLoc.h" 38#include "llvm/InitializePasses.h" 39#include "llvm/MC/MCInstrDesc.h" 40#include "llvm/Pass.h" 41#include "llvm/Support/Debug.h" 42#include "llvm/Support/ErrorHandling.h" 43#include "llvm/Support/MathExtras.h" 44#include "llvm/Support/raw_ostream.h" 45#include <algorithm> 46#include <cassert> 47#include <cstdint> 48#include <iterator> 49#include <vector> 50 51#define DEBUG_TYPE "hexagon-widen-stores" 52 53using namespace llvm; 54 55namespace llvm { 56 57FunctionPass *createHexagonStoreWidening(); 58void initializeHexagonStoreWideningPass(PassRegistry&); 59 60} // end namespace llvm 61 62namespace { 63 64 struct HexagonStoreWidening : public MachineFunctionPass { 65 const HexagonInstrInfo *TII; 66 const HexagonRegisterInfo *TRI; 67 const MachineRegisterInfo *MRI; 68 AliasAnalysis *AA; 69 MachineFunction *MF; 70 71 public: 72 static char ID; 73 74 HexagonStoreWidening() : MachineFunctionPass(ID) { 75 initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); 76 } 77 78 bool runOnMachineFunction(MachineFunction &MF) override; 79 80 StringRef getPassName() const override { return "Hexagon Store Widening"; } 81 82 void getAnalysisUsage(AnalysisUsage &AU) const override { 83 AU.addRequired<AAResultsWrapperPass>(); 84 AU.addPreserved<AAResultsWrapperPass>(); 85 MachineFunctionPass::getAnalysisUsage(AU); 86 } 87 88 static bool handledStoreType(const MachineInstr *MI); 89 90 private: 91 static const int MaxWideSize = 4; 92 93 using InstrGroup = std::vector<MachineInstr *>; 94 using InstrGroupList = std::vector<InstrGroup>; 95 96 bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); 97 bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); 98 void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, 99 InstrGroup::iterator End, InstrGroup &Group); 100 void createStoreGroups(MachineBasicBlock &MBB, 101 InstrGroupList &StoreGroups); 102 bool processBasicBlock(MachineBasicBlock &MBB); 103 bool processStoreGroup(InstrGroup &Group); 104 bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, 105 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); 106 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); 107 bool replaceStores(InstrGroup &OG, InstrGroup &NG); 108 bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); 109 }; 110 111} // end anonymous namespace 112 113char HexagonStoreWidening::ID = 0; 114 115INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", 116 "Hexason Store Widening", false, false) 117INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 118INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", 119 "Hexagon Store Widening", false, false) 120 121// Some local helper functions... 122static unsigned getBaseAddressRegister(const MachineInstr *MI) { 123 const MachineOperand &MO = MI->getOperand(0); 124 assert(MO.isReg() && "Expecting register operand"); 125 return MO.getReg(); 126} 127 128static int64_t getStoreOffset(const MachineInstr *MI) { 129 unsigned OpC = MI->getOpcode(); 130 assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); 131 132 switch (OpC) { 133 case Hexagon::S4_storeirb_io: 134 case Hexagon::S4_storeirh_io: 135 case Hexagon::S4_storeiri_io: { 136 const MachineOperand &MO = MI->getOperand(1); 137 assert(MO.isImm() && "Expecting immediate offset"); 138 return MO.getImm(); 139 } 140 } 141 dbgs() << *MI; 142 llvm_unreachable("Store offset calculation missing for a handled opcode"); 143 return 0; 144} 145 146static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { 147 assert(!MI->memoperands_empty() && "Expecting memory operands"); 148 return **MI->memoperands_begin(); 149} 150 151// Filtering function: any stores whose opcodes are not "approved" of by 152// this function will not be subjected to widening. 153inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { 154 // For now, only handle stores of immediate values. 155 // Also, reject stores to stack slots. 156 unsigned Opc = MI->getOpcode(); 157 switch (Opc) { 158 case Hexagon::S4_storeirb_io: 159 case Hexagon::S4_storeirh_io: 160 case Hexagon::S4_storeiri_io: 161 // Base address must be a register. (Implement FI later.) 162 return MI->getOperand(0).isReg(); 163 default: 164 return false; 165 } 166} 167 168// Check if the machine memory operand MMO is aliased with any of the 169// stores in the store group Stores. 170bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, 171 const MachineMemOperand &MMO) { 172 if (!MMO.getValue()) 173 return true; 174 175 MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); 176 177 for (auto SI : Stores) { 178 const MachineMemOperand &SMO = getStoreTarget(SI); 179 if (!SMO.getValue()) 180 return true; 181 182 MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); 183 if (AA->alias(L, SL)) 184 return true; 185 } 186 187 return false; 188} 189 190// Check if the machine instruction MI accesses any storage aliased with 191// any store in the group Stores. 192bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, 193 const MachineInstr *MI) { 194 for (auto &I : MI->memoperands()) 195 if (instrAliased(Stores, *I)) 196 return true; 197 return false; 198} 199 200// Inspect a machine basic block, and generate store groups out of stores 201// encountered in the block. 202// 203// A store group is a group of stores that use the same base register, 204// and which can be reordered within that group without altering the 205// semantics of the program. A single store group could be widened as 206// a whole, if there existed a single store instruction with the same 207// semantics as the entire group. In many cases, a single store group 208// may need more than one wide store. 209void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, 210 InstrGroupList &StoreGroups) { 211 InstrGroup AllInsns; 212 213 // Copy all instruction pointers from the basic block to a temporary 214 // list. This will allow operating on the list, and modifying its 215 // elements without affecting the basic block. 216 for (auto &I : MBB) 217 AllInsns.push_back(&I); 218 219 // Traverse all instructions in the AllInsns list, and if we encounter 220 // a store, then try to create a store group starting at that instruction 221 // i.e. a sequence of independent stores that can be widened. 222 for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { 223 MachineInstr *MI = *I; 224 // Skip null pointers (processed instructions). 225 if (!MI || !handledStoreType(MI)) 226 continue; 227 228 // Found a store. Try to create a store group. 229 InstrGroup G; 230 createStoreGroup(MI, I+1, E, G); 231 if (G.size() > 1) 232 StoreGroups.push_back(G); 233 } 234} 235 236// Create a single store group. The stores need to be independent between 237// themselves, and also there cannot be other instructions between them 238// that could read or modify storage being stored into. 239void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, 240 InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { 241 assert(handledStoreType(BaseStore) && "Unexpected instruction"); 242 unsigned BaseReg = getBaseAddressRegister(BaseStore); 243 InstrGroup Other; 244 245 Group.push_back(BaseStore); 246 247 for (auto I = Begin; I != End; ++I) { 248 MachineInstr *MI = *I; 249 if (!MI) 250 continue; 251 252 if (handledStoreType(MI)) { 253 // If this store instruction is aliased with anything already in the 254 // group, terminate the group now. 255 if (instrAliased(Group, getStoreTarget(MI))) 256 return; 257 // If this store is aliased to any of the memory instructions we have 258 // seen so far (that are not a part of this group), terminate the group. 259 if (instrAliased(Other, getStoreTarget(MI))) 260 return; 261 262 unsigned BR = getBaseAddressRegister(MI); 263 if (BR == BaseReg) { 264 Group.push_back(MI); 265 *I = nullptr; 266 continue; 267 } 268 } 269 270 // Assume calls are aliased to everything. 271 if (MI->isCall() || MI->hasUnmodeledSideEffects()) 272 return; 273 274 if (MI->mayLoadOrStore()) { 275 if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) 276 return; 277 Other.push_back(MI); 278 } 279 } // for 280} 281 282// Check if store instructions S1 and S2 are adjacent. More precisely, 283// S2 has to access memory immediately following that accessed by S1. 284bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, 285 const MachineInstr *S2) { 286 if (!handledStoreType(S1) || !handledStoreType(S2)) 287 return false; 288 289 const MachineMemOperand &S1MO = getStoreTarget(S1); 290 291 // Currently only handling immediate stores. 292 int Off1 = S1->getOperand(1).getImm(); 293 int Off2 = S2->getOperand(1).getImm(); 294 295 return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2) 296 : int(Off1+S1MO.getSize()) == Off2; 297} 298 299/// Given a sequence of adjacent stores, and a maximum size of a single wide 300/// store, pick a group of stores that can be replaced by a single store 301/// of size not exceeding MaxSize. The selected sequence will be recorded 302/// in OG ("old group" of instructions). 303/// OG should be empty on entry, and should be left empty if the function 304/// fails. 305bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, 306 InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, 307 unsigned MaxSize) { 308 assert(Begin != End && "No instructions to analyze"); 309 assert(OG.empty() && "Old group not empty on entry"); 310 311 if (std::distance(Begin, End) <= 1) 312 return false; 313 314 MachineInstr *FirstMI = *Begin; 315 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); 316 const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); 317 unsigned Alignment = FirstMMO.getAlignment(); 318 unsigned SizeAccum = FirstMMO.getSize(); 319 unsigned FirstOffset = getStoreOffset(FirstMI); 320 321 // The initial value of SizeAccum should always be a power of 2. 322 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); 323 324 // If the size of the first store equals to or exceeds the limit, do nothing. 325 if (SizeAccum >= MaxSize) 326 return false; 327 328 // If the size of the first store is greater than or equal to the address 329 // stored to, then the store cannot be made any wider. 330 if (SizeAccum >= Alignment) 331 return false; 332 333 // The offset of a store will put restrictions on how wide the store can be. 334 // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. 335 // If the first store already exhausts the offset limits, quit. Test this 336 // by checking if the next wider size would exceed the limit. 337 if ((2*SizeAccum-1) & FirstOffset) 338 return false; 339 340 OG.push_back(FirstMI); 341 MachineInstr *S1 = FirstMI; 342 343 // Pow2Num will be the largest number of elements in OG such that the sum 344 // of sizes of stores 0...Pow2Num-1 will be a power of 2. 345 unsigned Pow2Num = 1; 346 unsigned Pow2Size = SizeAccum; 347 348 // Be greedy: keep accumulating stores as long as they are to adjacent 349 // memory locations, and as long as the total number of bytes stored 350 // does not exceed the limit (MaxSize). 351 // Keep track of when the total size covered is a power of 2, since 352 // this is a size a single store can cover. 353 for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { 354 MachineInstr *S2 = *I; 355 // Stores are sorted, so if S1 and S2 are not adjacent, there won't be 356 // any other store to fill the "hole". 357 if (!storesAreAdjacent(S1, S2)) 358 break; 359 360 unsigned S2Size = getStoreTarget(S2).getSize(); 361 if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) 362 break; 363 364 OG.push_back(S2); 365 SizeAccum += S2Size; 366 if (isPowerOf2_32(SizeAccum)) { 367 Pow2Num = OG.size(); 368 Pow2Size = SizeAccum; 369 } 370 if ((2*Pow2Size-1) & FirstOffset) 371 break; 372 373 S1 = S2; 374 } 375 376 // The stores don't add up to anything that can be widened. Clean up. 377 if (Pow2Num <= 1) { 378 OG.clear(); 379 return false; 380 } 381 382 // Only leave the stored being widened. 383 OG.resize(Pow2Num); 384 TotalSize = Pow2Size; 385 return true; 386} 387 388/// Given an "old group" OG of stores, create a "new group" NG of instructions 389/// to replace them. Ideally, NG would only have a single instruction in it, 390/// but that may only be possible for store-immediate. 391bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, 392 unsigned TotalSize) { 393 // XXX Current limitations: 394 // - only expect stores of immediate values in OG, 395 // - only handle a TotalSize of up to 4. 396 397 if (TotalSize > 4) 398 return false; 399 400 unsigned Acc = 0; // Value accumulator. 401 unsigned Shift = 0; 402 403 for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { 404 MachineInstr *MI = *I; 405 const MachineMemOperand &MMO = getStoreTarget(MI); 406 MachineOperand &SO = MI->getOperand(2); // Source. 407 assert(SO.isImm() && "Expecting an immediate operand"); 408 409 unsigned NBits = MMO.getSize()*8; 410 unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); 411 unsigned Val = (SO.getImm() & Mask) << Shift; 412 Acc |= Val; 413 Shift += NBits; 414 } 415 416 MachineInstr *FirstSt = OG.front(); 417 DebugLoc DL = OG.back()->getDebugLoc(); 418 const MachineMemOperand &OldM = getStoreTarget(FirstSt); 419 MachineMemOperand *NewM = 420 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), 421 TotalSize, OldM.getAlignment(), 422 OldM.getAAInfo()); 423 424 if (Acc < 0x10000) { 425 // Create mem[hw] = #Acc 426 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : 427 (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; 428 assert(WOpc && "Unexpected size"); 429 430 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); 431 const MCInstrDesc &StD = TII->get(WOpc); 432 MachineOperand &MR = FirstSt->getOperand(0); 433 int64_t Off = FirstSt->getOperand(1).getImm(); 434 MachineInstr *StI = 435 BuildMI(*MF, DL, StD) 436 .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) 437 .addImm(Off) 438 .addImm(Val); 439 StI->addMemOperand(*MF, NewM); 440 NG.push_back(StI); 441 } else { 442 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg 443 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); 444 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); 445 Register VReg = MF->getRegInfo().createVirtualRegister(RC); 446 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) 447 .addImm(int(Acc)); 448 NG.push_back(TfrI); 449 450 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : 451 (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; 452 assert(WOpc && "Unexpected size"); 453 454 const MCInstrDesc &StD = TII->get(WOpc); 455 MachineOperand &MR = FirstSt->getOperand(0); 456 int64_t Off = FirstSt->getOperand(1).getImm(); 457 MachineInstr *StI = 458 BuildMI(*MF, DL, StD) 459 .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) 460 .addImm(Off) 461 .addReg(VReg, RegState::Kill); 462 StI->addMemOperand(*MF, NewM); 463 NG.push_back(StI); 464 } 465 466 return true; 467} 468 469// Replace instructions from the old group OG with instructions from the 470// new group NG. Conceptually, remove all instructions in OG, and then 471// insert all instructions in NG, starting at where the first instruction 472// from OG was (in the order in which they appeared in the basic block). 473// (The ordering in OG does not have to match the order in the basic block.) 474bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { 475 LLVM_DEBUG({ 476 dbgs() << "Replacing:\n"; 477 for (auto I : OG) 478 dbgs() << " " << *I; 479 dbgs() << "with\n"; 480 for (auto I : NG) 481 dbgs() << " " << *I; 482 }); 483 484 MachineBasicBlock *MBB = OG.back()->getParent(); 485 MachineBasicBlock::iterator InsertAt = MBB->end(); 486 487 // Need to establish the insertion point. The best one is right before 488 // the first store in the OG, but in the order in which the stores occur 489 // in the program list. Since the ordering in OG does not correspond 490 // to the order in the program list, we need to do some work to find 491 // the insertion point. 492 493 // Create a set of all instructions in OG (for quick lookup). 494 SmallPtrSet<MachineInstr*, 4> InstrSet; 495 for (auto I : OG) 496 InstrSet.insert(I); 497 498 // Traverse the block, until we hit an instruction from OG. 499 for (auto &I : *MBB) { 500 if (InstrSet.count(&I)) { 501 InsertAt = I; 502 break; 503 } 504 } 505 506 assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); 507 508 bool AtBBStart = false; 509 510 // InsertAt points at the first instruction that will be removed. We need 511 // to move it out of the way, so it remains valid after removing all the 512 // old stores, and so we are able to recover it back to the proper insertion 513 // position. 514 if (InsertAt != MBB->begin()) 515 --InsertAt; 516 else 517 AtBBStart = true; 518 519 for (auto I : OG) 520 I->eraseFromParent(); 521 522 if (!AtBBStart) 523 ++InsertAt; 524 else 525 InsertAt = MBB->begin(); 526 527 for (auto I : NG) 528 MBB->insert(InsertAt, I); 529 530 return true; 531} 532 533// Break up the group into smaller groups, each of which can be replaced by 534// a single wide store. Widen each such smaller group and replace the old 535// instructions with the widened ones. 536bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { 537 bool Changed = false; 538 InstrGroup::iterator I = Group.begin(), E = Group.end(); 539 InstrGroup OG, NG; // Old and new groups. 540 unsigned CollectedSize; 541 542 while (I != E) { 543 OG.clear(); 544 NG.clear(); 545 546 bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && 547 createWideStores(OG, NG, CollectedSize) && 548 replaceStores(OG, NG); 549 if (!Succ) 550 continue; 551 552 assert(OG.size() > 1 && "Created invalid group"); 553 assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); 554 I += OG.size()-1; 555 556 Changed = true; 557 } 558 559 return Changed; 560} 561 562// Process a single basic block: create the store groups, and replace them 563// with the widened stores, if possible. Processing of each basic block 564// is independent from processing of any other basic block. This transfor- 565// mation could be stopped after having processed any basic block without 566// any ill effects (other than not having performed widening in the unpro- 567// cessed blocks). Also, the basic blocks can be processed in any order. 568bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { 569 InstrGroupList SGs; 570 bool Changed = false; 571 572 createStoreGroups(MBB, SGs); 573 574 auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { 575 return getStoreOffset(A) < getStoreOffset(B); 576 }; 577 for (auto &G : SGs) { 578 assert(G.size() > 1 && "Store group with fewer than 2 elements"); 579 llvm::sort(G, Less); 580 581 Changed |= processStoreGroup(G); 582 } 583 584 return Changed; 585} 586 587bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { 588 if (skipFunction(MFn.getFunction())) 589 return false; 590 591 MF = &MFn; 592 auto &ST = MFn.getSubtarget<HexagonSubtarget>(); 593 TII = ST.getInstrInfo(); 594 TRI = ST.getRegisterInfo(); 595 MRI = &MFn.getRegInfo(); 596 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 597 598 bool Changed = false; 599 600 for (auto &B : MFn) 601 Changed |= processBasicBlock(B); 602 603 return Changed; 604} 605 606FunctionPass *llvm::createHexagonStoreWidening() { 607 return new HexagonStoreWidening(); 608} 609