AArch64TargetMachine.cpp revision 360784
133965Sjdp//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// 289857Sobrien// 3130561Sobrien// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 460484Sobrien// See https://llvm.org/LICENSE.txt for license information. 533965Sjdp// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6104834Sobrien// 733965Sjdp//===----------------------------------------------------------------------===// 8104834Sobrien// 9104834Sobrien// 10104834Sobrien//===----------------------------------------------------------------------===// 11104834Sobrien 1233965Sjdp#include "AArch64TargetMachine.h" 13104834Sobrien#include "AArch64.h" 14104834Sobrien#include "AArch64MacroFusion.h" 15104834Sobrien#include "AArch64Subtarget.h" 16104834Sobrien#include "AArch64TargetObjectFile.h" 1733965Sjdp#include "AArch64TargetTransformInfo.h" 18104834Sobrien#include "MCTargetDesc/AArch64MCTargetDesc.h" 19104834Sobrien#include "TargetInfo/AArch64TargetInfo.h" 20104834Sobrien#include "llvm/ADT/STLExtras.h" 2133965Sjdp#include "llvm/ADT/Triple.h" 2233965Sjdp#include "llvm/Analysis/TargetTransformInfo.h" 2333965Sjdp#include "llvm/CodeGen/CSEConfigBase.h" 2433965Sjdp#include "llvm/CodeGen/GlobalISel/IRTranslator.h" 2533965Sjdp#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 2633965Sjdp#include "llvm/CodeGen/GlobalISel/Legalizer.h" 27130561Sobrien#include "llvm/CodeGen/GlobalISel/Localizer.h" 28104834Sobrien#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" 2933965Sjdp#include "llvm/CodeGen/MachineScheduler.h" 3033965Sjdp#include "llvm/CodeGen/Passes.h" 3133965Sjdp#include "llvm/CodeGen/TargetPassConfig.h" 3233965Sjdp#include "llvm/IR/Attributes.h" 3333965Sjdp#include "llvm/IR/Function.h" 3433965Sjdp#include "llvm/InitializePasses.h" 3533965Sjdp#include "llvm/MC/MCAsmInfo.h" 3661843Sobrien#include "llvm/MC/MCTargetOptions.h" 37104834Sobrien#include "llvm/Pass.h" 3833965Sjdp#include "llvm/Support/CodeGen.h" 3933965Sjdp#include "llvm/Support/CommandLine.h" 4033965Sjdp#include "llvm/Support/TargetRegistry.h" 4133965Sjdp#include "llvm/Target/TargetLoweringObjectFile.h" 4233965Sjdp#include "llvm/Target/TargetOptions.h" 4333965Sjdp#include "llvm/Transforms/CFGuard.h" 4433965Sjdp#include "llvm/Transforms/Scalar.h" 4533965Sjdp#include <memory> 4660484Sobrien#include <string> 4760484Sobrien 4860484Sobrienusing namespace llvm; 4960484Sobrien 5060484Sobrienstatic cl::opt<bool> EnableCCMP("aarch64-enable-ccmp", 5160484Sobrien cl::desc("Enable the CCMP formation pass"), 5233965Sjdp cl::init(true), cl::Hidden); 5333965Sjdp 5433965Sjdpstatic cl::opt<bool> 5533965Sjdp EnableCondBrTuning("aarch64-enable-cond-br-tune", 5633965Sjdp cl::desc("Enable the conditional branch tuning pass"), 57130561Sobrien cl::init(true), cl::Hidden); 5833965Sjdp 5933965Sjdpstatic cl::opt<bool> EnableMCR("aarch64-enable-mcr", 6033965Sjdp cl::desc("Enable the machine combiner pass"), 61130561Sobrien cl::init(true), cl::Hidden); 62130561Sobrien 63130561Sobrienstatic cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress", 64130561Sobrien cl::desc("Suppress STP for AArch64"), 65130561Sobrien cl::init(true), cl::Hidden); 66130561Sobrien 6733965Sjdpstatic cl::opt<bool> EnableAdvSIMDScalar( 6833965Sjdp "aarch64-enable-simd-scalar", 69130561Sobrien cl::desc("Enable use of AdvSIMD scalar integer instructions"), 70130561Sobrien cl::init(false), cl::Hidden); 7133965Sjdp 7233965Sjdpstatic cl::opt<bool> 73130561Sobrien EnablePromoteConstant("aarch64-enable-promote-const", 74130561Sobrien cl::desc("Enable the promote constant pass"), 75130561Sobrien cl::init(true), cl::Hidden); 76130561Sobrien 77130561Sobrienstatic cl::opt<bool> EnableCollectLOH( 78130561Sobrien "aarch64-enable-collect-loh", 79130561Sobrien cl::desc("Enable the pass that emits the linker optimization hints (LOH)"), 80130561Sobrien cl::init(true), cl::Hidden); 8133965Sjdp 8233965Sjdpstatic cl::opt<bool> 8333965Sjdp EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden, 8433965Sjdp cl::desc("Enable the pass that removes dead" 8533965Sjdp " definitons and replaces stores to" 8633965Sjdp " them with stores to the zero" 8733965Sjdp " register"), 8833965Sjdp cl::init(true)); 8933965Sjdp 9033965Sjdpstatic cl::opt<bool> EnableRedundantCopyElimination( 9133965Sjdp "aarch64-enable-copyelim", 9233965Sjdp cl::desc("Enable the redundant copy elimination pass"), cl::init(true), 9333965Sjdp cl::Hidden); 9433965Sjdp 9533965Sjdpstatic cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt", 9633965Sjdp cl::desc("Enable the load/store pair" 9733965Sjdp " optimization pass"), 9833965Sjdp cl::init(true), cl::Hidden); 9933965Sjdp 10033965Sjdpstatic cl::opt<bool> EnableAtomicTidy( 10133965Sjdp "aarch64-enable-atomic-cfg-tidy", cl::Hidden, 10233965Sjdp cl::desc("Run SimplifyCFG after expanding atomic operations" 10333965Sjdp " to make use of cmpxchg flow-based information"), 10433965Sjdp cl::init(true)); 10533965Sjdp 106130561Sobrienstatic cl::opt<bool> 10733965SjdpEnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, 10833965Sjdp cl::desc("Run early if-conversion"), 10933965Sjdp cl::init(true)); 11033965Sjdp 11133965Sjdpstatic cl::opt<bool> 11233965Sjdp EnableCondOpt("aarch64-enable-condopt", 11333965Sjdp cl::desc("Enable the condition optimizer pass"), 11433965Sjdp cl::init(true), cl::Hidden); 11533965Sjdp 11633965Sjdpstatic cl::opt<bool> 11733965SjdpEnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden, 11833965Sjdp cl::desc("Work around Cortex-A53 erratum 835769"), 11933965Sjdp cl::init(false)); 12033965Sjdp 12133965Sjdpstatic cl::opt<bool> 12233965Sjdp EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, 12333965Sjdp cl::desc("Enable optimizations on complex GEPs"), 12433965Sjdp cl::init(false)); 12533965Sjdp 12633965Sjdpstatic cl::opt<bool> 127130561Sobrien BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), 12833965Sjdp cl::desc("Relax out of range conditional branches")); 129130561Sobrien 13060484Sobrienstatic cl::opt<bool> EnableCompressJumpTables( 131130561Sobrien "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true), 13260484Sobrien cl::desc("Use smallest entry possible for jump tables")); 13360484Sobrien 13433965Sjdp// FIXME: Unify control over GlobalMerge. 135130561Sobrienstatic cl::opt<cl::boolOrDefault> 13633965Sjdp EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden, 13760484Sobrien cl::desc("Enable the global merge pass")); 13860484Sobrien 13960484Sobrienstatic cl::opt<bool> 140130561Sobrien EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden, 14160484Sobrien cl::desc("Enable the loop data prefetch pass"), 14233965Sjdp cl::init(true)); 14333965Sjdp 14433965Sjdpstatic cl::opt<int> EnableGlobalISelAtO( 145130561Sobrien "aarch64-enable-global-isel-at-O", cl::Hidden, 14633965Sjdp cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"), 14733965Sjdp cl::init(0)); 14833965Sjdp 14933965Sjdpstatic cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", 15033965Sjdp cl::init(true), cl::Hidden); 15133965Sjdp 15233965Sjdpstatic cl::opt<bool> 15333965Sjdp EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden, 15433965Sjdp cl::desc("Enable the AAcrh64 branch target pass"), 15533965Sjdp cl::init(true)); 156130561Sobrien 15733965Sjdpextern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { 15833965Sjdp // Register the target. 15960484Sobrien RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget()); 16033965Sjdp RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget()); 16133965Sjdp RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target()); 16233965Sjdp RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target()); 16333965Sjdp RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target()); 16433965Sjdp auto PR = PassRegistry::getPassRegistry(); 16533965Sjdp initializeGlobalISel(*PR); 16633965Sjdp initializeAArch64A53Fix835769Pass(*PR); 16733965Sjdp initializeAArch64A57FPLoadBalancingPass(*PR); 16833965Sjdp initializeAArch64AdvSIMDScalarPass(*PR); 16933965Sjdp initializeAArch64BranchTargetsPass(*PR); 17060484Sobrien initializeAArch64CollectLOHPass(*PR); 17133965Sjdp initializeAArch64CompressJumpTablesPass(*PR); 17233965Sjdp initializeAArch64ConditionalComparesPass(*PR); 17333965Sjdp initializeAArch64ConditionOptimizerPass(*PR); 17433965Sjdp initializeAArch64DeadRegisterDefinitionsPass(*PR); 17533965Sjdp initializeAArch64ExpandPseudoPass(*PR); 17633965Sjdp initializeAArch64LoadStoreOptPass(*PR); 17733965Sjdp initializeAArch64SIMDInstrOptPass(*PR); 17833965Sjdp initializeAArch64PreLegalizerCombinerPass(*PR); 179130561Sobrien initializeAArch64PromoteConstantPass(*PR); 18033965Sjdp initializeAArch64RedundantCopyEliminationPass(*PR); 18160484Sobrien initializeAArch64StorePairSuppressPass(*PR); 18233965Sjdp initializeFalkorHWPFFixPass(*PR); 18333965Sjdp initializeFalkorMarkStridedAccessesLegacyPass(*PR); 18433965Sjdp initializeLDTLSCleanupPass(*PR); 18533965Sjdp initializeAArch64SpeculationHardeningPass(*PR); 18633965Sjdp initializeAArch64StackTaggingPass(*PR); 18733965Sjdp initializeAArch64StackTaggingPreRAPass(*PR); 18833965Sjdp} 18933965Sjdp 19033965Sjdp//===----------------------------------------------------------------------===// 19133965Sjdp// AArch64 Lowering public interface. 19233965Sjdp//===----------------------------------------------------------------------===// 19361843Sobrienstatic std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 19433965Sjdp if (TT.isOSBinFormatMachO()) 19560484Sobrien return std::make_unique<AArch64_MachoTargetObjectFile>(); 19660484Sobrien if (TT.isOSBinFormatCOFF()) 197104834Sobrien return std::make_unique<AArch64_COFFTargetObjectFile>(); 19860484Sobrien 19960484Sobrien return std::make_unique<AArch64_ELFTargetObjectFile>(); 20060484Sobrien} 20160484Sobrien 20260484Sobrien// Helper function to build a DataLayout string 20360484Sobrienstatic std::string computeDataLayout(const Triple &TT, 204130561Sobrien const MCTargetOptions &Options, 20533965Sjdp bool LittleEndian) { 20633965Sjdp if (Options.getABIName() == "ilp32") 20733965Sjdp return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; 20833965Sjdp if (TT.isOSBinFormatMachO()) { 20960484Sobrien if (TT.getArch() == Triple::aarch64_32) 21060484Sobrien return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128"; 21133965Sjdp return "e-m:o-i64:64-i128:128-n32:64-S128"; 21233965Sjdp } 21333965Sjdp if (TT.isOSBinFormatCOFF()) 214130561Sobrien return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; 21533965Sjdp if (LittleEndian) 21633965Sjdp return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; 217130561Sobrien return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; 21833965Sjdp} 21933965Sjdp 22033965Sjdpstatic Reloc::Model getEffectiveRelocModel(const Triple &TT, 22133965Sjdp Optional<Reloc::Model> RM) { 222104834Sobrien // AArch64 Darwin and Windows are always PIC. 22333965Sjdp if (TT.isOSDarwin() || TT.isOSWindows()) 22460484Sobrien return Reloc::PIC_; 22560484Sobrien // On ELF platforms the default static relocation model has a smart enough 226104834Sobrien // linker to cope with referencing external symbols defined in a shared 22760484Sobrien // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. 22860484Sobrien if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC) 22960484Sobrien return Reloc::Static; 23060484Sobrien return *RM; 23160484Sobrien} 23260484Sobrien 23360484Sobrienstatic CodeModel::Model 23460484SobriengetEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM, 23560484Sobrien bool JIT) { 23660484Sobrien if (CM) { 23760484Sobrien if (*CM != CodeModel::Small && *CM != CodeModel::Tiny && 23860484Sobrien *CM != CodeModel::Large) { 23960484Sobrien if (!TT.isOSFuchsia()) 24060484Sobrien report_fatal_error( 24160484Sobrien "Only small, tiny and large code models are allowed on AArch64"); 24260484Sobrien else if (*CM != CodeModel::Kernel) 24360484Sobrien report_fatal_error("Only small, tiny, kernel, and large code models " 24460484Sobrien "are allowed on AArch64"); 24560484Sobrien } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF()) 24660484Sobrien report_fatal_error("tiny code model is only supported on ELF"); 24760484Sobrien return *CM; 24860484Sobrien } 24960484Sobrien // The default MCJIT memory managers make no guarantees about where they can 25060484Sobrien // find an executable page; JITed code needs to be able to refer to globals 25160484Sobrien // no matter how far away they are. 252104834Sobrien // We should set the CodeModel::Small for Windows ARM64 in JIT mode, 253104834Sobrien // since with large code model LLVM generating 4 MOV instructions, and 25460484Sobrien // Windows doesn't support relocating these long branch (4 MOVs). 25533965Sjdp if (JIT && !TT.isOSWindows()) 25689857Sobrien return CodeModel::Large; 257104834Sobrien return CodeModel::Small; 25889857Sobrien} 25989857Sobrien 26089857Sobrien/// Create an AArch64 architecture model. 26189857Sobrien/// 26289857SobrienAArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, 26389857Sobrien StringRef CPU, StringRef FS, 26433965Sjdp const TargetOptions &Options, 26533965Sjdp Optional<Reloc::Model> RM, 26633965Sjdp Optional<CodeModel::Model> CM, 26733965Sjdp CodeGenOpt::Level OL, bool JIT, 26860484Sobrien bool LittleEndian) 26933965Sjdp : LLVMTargetMachine(T, 27033965Sjdp computeDataLayout(TT, Options.MCOptions, LittleEndian), 27133965Sjdp TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), 27233965Sjdp getEffectiveAArch64CodeModel(TT, CM, JIT), OL), 27333965Sjdp TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) { 27433965Sjdp initAsmInfo(); 27533965Sjdp 27633965Sjdp if (TT.isOSBinFormatMachO()) { 277130561Sobrien this->Options.TrapUnreachable = true; 27833965Sjdp this->Options.NoTrapAfterNoreturn = true; 27933965Sjdp } 28033965Sjdp 28160484Sobrien if (getMCAsmInfo()->usesWindowsCFI()) { 28260484Sobrien // Unwinding can get confused if the last instruction in an 28360484Sobrien // exception-handling region (function, funclet, try block, etc.) 28433965Sjdp // is a call. 28561843Sobrien // 28661843Sobrien // FIXME: We could elide the trap if the next instruction would be in 28761843Sobrien // the same region anyway. 28861843Sobrien this->Options.TrapUnreachable = true; 28977298Sobrien } 29061843Sobrien 29161843Sobrien if (this->Options.TLSSize == 0) // default 29277298Sobrien this->Options.TLSSize = 24; 29361843Sobrien if ((getCodeModel() == CodeModel::Small || 29461843Sobrien getCodeModel() == CodeModel::Kernel) && 29533965Sjdp this->Options.TLSSize > 32) 29633965Sjdp // for the small (and kernel) code model, the maximum TLS size is 4GiB 29733965Sjdp this->Options.TLSSize = 32; 29833965Sjdp else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24) 29933965Sjdp // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB) 30033965Sjdp this->Options.TLSSize = 24; 30133965Sjdp 30233965Sjdp // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is 30333965Sjdp // MachO/CodeModel::Large, which GlobalISel does not support. 30433965Sjdp if (getOptLevel() <= EnableGlobalISelAtO && 30533965Sjdp TT.getArch() != Triple::aarch64_32 && 30633965Sjdp !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) { 30733965Sjdp setGlobalISel(true); 30833965Sjdp setGlobalISelAbort(GlobalISelAbortMode::Disable); 30933965Sjdp } 31033965Sjdp 31133965Sjdp // AArch64 supports the MachineOutliner. 31233965Sjdp setMachineOutliner(true); 31333965Sjdp 31433965Sjdp // AArch64 supports default outlining behaviour. 31533965Sjdp setSupportsDefaultOutlining(true); 31633965Sjdp} 31733965Sjdp 31860484SobrienAArch64TargetMachine::~AArch64TargetMachine() = default; 31933965Sjdp 32033965Sjdpconst AArch64Subtarget * 32133965SjdpAArch64TargetMachine::getSubtargetImpl(const Function &F) const { 32233965Sjdp Attribute CPUAttr = F.getFnAttribute("target-cpu"); 323130561Sobrien Attribute FSAttr = F.getFnAttribute("target-features"); 32433965Sjdp 32533965Sjdp std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 32633965Sjdp ? CPUAttr.getValueAsString().str() 327130561Sobrien : TargetCPU; 328130561Sobrien std::string FS = !FSAttr.hasAttribute(Attribute::None) 32933965Sjdp ? FSAttr.getValueAsString().str() 33033965Sjdp : TargetFS; 33133965Sjdp 33233965Sjdp auto &I = SubtargetMap[CPU + FS]; 33333965Sjdp if (!I) { 33433965Sjdp // This needs to be done before we create a new subtarget since any 33533965Sjdp // creation will depend on the TM and the code generation flags on the 33633965Sjdp // function that reside in TargetOptions. 33733965Sjdp resetTargetOptions(F); 338130561Sobrien I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, 33989857Sobrien isLittle); 34033965Sjdp } 341130561Sobrien return I.get(); 34233965Sjdp} 34333965Sjdp 34433965Sjdpvoid AArch64leTargetMachine::anchor() { } 34533965Sjdp 34633965SjdpAArch64leTargetMachine::AArch64leTargetMachine( 34733965Sjdp const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 34833965Sjdp const TargetOptions &Options, Optional<Reloc::Model> RM, 34933965Sjdp Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) 35033965Sjdp : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {} 35133965Sjdp 35260484Sobrienvoid AArch64beTargetMachine::anchor() { } 35333965Sjdp 35433965SjdpAArch64beTargetMachine::AArch64beTargetMachine( 355104834Sobrien const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 356130561Sobrien const TargetOptions &Options, Optional<Reloc::Model> RM, 35733965Sjdp Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) 35860484Sobrien : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} 35960484Sobrien 36060484Sobriennamespace { 36189857Sobrien 36289857Sobrien/// AArch64 Code Generator Pass Configuration Options. 36389857Sobrienclass AArch64PassConfig : public TargetPassConfig { 36460484Sobrienpublic: 36560484Sobrien AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM) 36660484Sobrien : TargetPassConfig(TM, PM) { 36733965Sjdp if (TM.getOptLevel() != CodeGenOpt::None) 36833965Sjdp substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); 36933965Sjdp } 37033965Sjdp 37133965Sjdp AArch64TargetMachine &getAArch64TargetMachine() const { 37233965Sjdp return getTM<AArch64TargetMachine>(); 37333965Sjdp } 37433965Sjdp 37561843Sobrien ScheduleDAGInstrs * 376104834Sobrien createMachineScheduler(MachineSchedContext *C) const override { 377104834Sobrien const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); 378104834Sobrien ScheduleDAGMILive *DAG = createGenericSchedLive(C); 379104834Sobrien DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 380104834Sobrien DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 381104834Sobrien if (ST.hasFusion()) 382104834Sobrien DAG->addMutation(createAArch64MacroFusionDAGMutation()); 383104834Sobrien return DAG; 38461843Sobrien } 38533965Sjdp 38633965Sjdp ScheduleDAGInstrs * 38733965Sjdp createPostMachineScheduler(MachineSchedContext *C) const override { 38833965Sjdp const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); 38933965Sjdp if (ST.hasFusion()) { 39061843Sobrien // Run the Macro Fusion after RA again since literals are expanded from 39133965Sjdp // pseudos then (v. addPreSched2()). 39233965Sjdp ScheduleDAGMI *DAG = createGenericSchedPostRA(C); 39333965Sjdp DAG->addMutation(createAArch64MacroFusionDAGMutation()); 39433965Sjdp return DAG; 39533965Sjdp } 39633965Sjdp 39733965Sjdp return nullptr; 39833965Sjdp } 39933965Sjdp 40033965Sjdp void addIRPasses() override; 40133965Sjdp bool addPreISel() override; 40233965Sjdp bool addInstSelector() override; 40333965Sjdp bool addIRTranslator() override; 40433965Sjdp void addPreLegalizeMachineIR() override; 40533965Sjdp bool addLegalizeMachineIR() override; 40633965Sjdp bool addRegBankSelect() override; 40733965Sjdp void addPreGlobalInstructionSelect() override; 40833965Sjdp bool addGlobalInstructionSelect() override; 40933965Sjdp bool addILPOpts() override; 41033965Sjdp void addPreRegAlloc() override; 41133965Sjdp void addPostRegAlloc() override; 41233965Sjdp void addPreSched2() override; 41333965Sjdp void addPreEmitPass() override; 41433965Sjdp 41533965Sjdp std::unique_ptr<CSEConfigBase> getCSEConfig() const override; 41633965Sjdp}; 41733965Sjdp 418104834Sobrien} // end anonymous namespace 419104834Sobrien 420104834SobrienTargetTransformInfo 421104834SobrienAArch64TargetMachine::getTargetTransformInfo(const Function &F) { 422104834Sobrien return TargetTransformInfo(AArch64TTIImpl(this, F)); 42377298Sobrien} 42433965Sjdp 42533965SjdpTargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { 426130561Sobrien return new AArch64PassConfig(*this, PM); 42733965Sjdp} 42889857Sobrien 42989857Sobrienstd::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const { 43089857Sobrien return getStandardCSEConfigForOpt(TM->getOptLevel()); 43189857Sobrien} 43233965Sjdp 43333965Sjdpvoid AArch64PassConfig::addIRPasses() { 43433965Sjdp // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg 43533965Sjdp // ourselves. 43633965Sjdp addPass(createAtomicExpandPass()); 43733965Sjdp 43833965Sjdp // Cmpxchg instructions are often used with a subsequent comparison to 43933965Sjdp // determine whether it succeeded. We can exploit existing control-flow in 44033965Sjdp // ldrex/strex loops to simplify this, but it needs tidying up. 441130561Sobrien if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) 44233965Sjdp addPass(createCFGSimplificationPass(1, true, true, false, true)); 44333965Sjdp 44433965Sjdp // Run LoopDataPrefetch 44533965Sjdp // 44633965Sjdp // Run this before LSR to remove the multiplies involved in computing the 44733965Sjdp // pointer values N iterations ahead. 44833965Sjdp if (TM->getOptLevel() != CodeGenOpt::None) { 44933965Sjdp if (EnableLoopDataPrefetch) 45033965Sjdp addPass(createLoopDataPrefetchPass()); 45133965Sjdp if (EnableFalkorHWPFFix) 45233965Sjdp addPass(createFalkorMarkStridedAccessesPass()); 45333965Sjdp } 45433965Sjdp 45533965Sjdp TargetPassConfig::addIRPasses(); 45633965Sjdp 45733965Sjdp // Match interleaved memory accesses to ldN/stN intrinsics. 45833965Sjdp if (TM->getOptLevel() != CodeGenOpt::None) { 45933965Sjdp addPass(createInterleavedLoadCombinePass()); 46033965Sjdp addPass(createInterleavedAccessPass()); 46133965Sjdp } 46233965Sjdp 463130561Sobrien if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { 464130561Sobrien // Call SeparateConstOffsetFromGEP pass to extract constants within indices 46533965Sjdp // and lower a GEP with multiple indices to either arithmetic operations or 46633965Sjdp // multiple GEPs with single index. 467130561Sobrien addPass(createSeparateConstOffsetFromGEPPass(true)); 468130561Sobrien // Call EarlyCSE pass to find and remove subexpressions in the lowered 469130561Sobrien // result. 470130561Sobrien addPass(createEarlyCSEPass()); 471130561Sobrien // Do loop invariant code motion in case part of the lowered result is 472130561Sobrien // invariant. 47333965Sjdp addPass(createLICMPass()); 474130561Sobrien } 47533965Sjdp 476130561Sobrien addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() != 47733965Sjdp CodeGenOpt::None)); 47833965Sjdp 47933965Sjdp // Add Control Flow Guard checks. 48033965Sjdp if (TM->getTargetTriple().isOSWindows()) 48133965Sjdp addPass(createCFGuardCheckPass()); 48233965Sjdp} 48333965Sjdp 48433965Sjdp// Pass Pipeline Configuration 48533965Sjdpbool AArch64PassConfig::addPreISel() { 48633965Sjdp // Run promote constant before global merge, so that the promoted constants 487130561Sobrien // get a chance to be merged 488130561Sobrien if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) 489130561Sobrien addPass(createAArch64PromoteConstantPass()); 490130561Sobrien // FIXME: On AArch64, this depends on the type. 491130561Sobrien // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). 492130561Sobrien // and the offset has to be a multiple of the related size in bytes. 493130561Sobrien if ((TM->getOptLevel() != CodeGenOpt::None && 494130561Sobrien EnableGlobalMerge == cl::BOU_UNSET) || 495130561Sobrien EnableGlobalMerge == cl::BOU_TRUE) { 496130561Sobrien bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && 497130561Sobrien (EnableGlobalMerge == cl::BOU_UNSET); 498130561Sobrien 499130561Sobrien // Merging of extern globals is enabled by default on non-Mach-O as we 500130561Sobrien // expect it to be generally either beneficial or harmless. On Mach-O it 501130561Sobrien // is disabled as we emit the .subsections_via_symbols directive which 502130561Sobrien // means that merging extern globals is not safe. 503130561Sobrien bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); 504130561Sobrien 505130561Sobrien // FIXME: extern global merging is only enabled when we optimise for size 506130561Sobrien // because there are some regressions with it also enabled for performance. 507130561Sobrien if (!OnlyOptimizeForSize) 508130561Sobrien MergeExternalByDefault = false; 509130561Sobrien 510130561Sobrien addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize, 511130561Sobrien MergeExternalByDefault)); 512130561Sobrien } 513130561Sobrien 514130561Sobrien return false; 515130561Sobrien} 516130561Sobrien 517130561Sobrienbool AArch64PassConfig::addInstSelector() { 51833965Sjdp addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); 519130561Sobrien 520130561Sobrien // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many 521130561Sobrien // references to _TLS_MODULE_BASE_ as possible. 522130561Sobrien if (TM->getTargetTriple().isOSBinFormatELF() && 523130561Sobrien getOptLevel() != CodeGenOpt::None) 524130561Sobrien addPass(createAArch64CleanupLocalDynamicTLSPass()); 525130561Sobrien 526130561Sobrien return false; 527130561Sobrien} 528130561Sobrien 529130561Sobrienbool AArch64PassConfig::addIRTranslator() { 530130561Sobrien addPass(new IRTranslator()); 531130561Sobrien return false; 532130561Sobrien} 533130561Sobrien 534130561Sobrienvoid AArch64PassConfig::addPreLegalizeMachineIR() { 535130561Sobrien bool IsOptNone = getOptLevel() == CodeGenOpt::None; 536130561Sobrien addPass(createAArch64PreLegalizeCombiner(IsOptNone)); 537130561Sobrien} 538130561Sobrien 539130561Sobrienbool AArch64PassConfig::addLegalizeMachineIR() { 540130561Sobrien addPass(new Legalizer()); 541130561Sobrien return false; 542130561Sobrien} 543130561Sobrien 544130561Sobrienbool AArch64PassConfig::addRegBankSelect() { 545130561Sobrien addPass(new RegBankSelect()); 546130561Sobrien return false; 547130561Sobrien} 548130561Sobrien 549130561Sobrienvoid AArch64PassConfig::addPreGlobalInstructionSelect() { 550130561Sobrien addPass(new Localizer()); 551130561Sobrien} 552130561Sobrien 553130561Sobrienbool AArch64PassConfig::addGlobalInstructionSelect() { 554130561Sobrien addPass(new InstructionSelect()); 555130561Sobrien return false; 556130561Sobrien} 557130561Sobrien 558130561Sobrienbool AArch64PassConfig::addILPOpts() { 559130561Sobrien if (EnableCondOpt) 560130561Sobrien addPass(createAArch64ConditionOptimizerPass()); 561130561Sobrien if (EnableCCMP) 562130561Sobrien addPass(createAArch64ConditionalCompares()); 563130561Sobrien if (EnableMCR) 564130561Sobrien addPass(&MachineCombinerID); 56533965Sjdp if (EnableCondBrTuning) 56633965Sjdp addPass(createAArch64CondBrTuning()); 567130561Sobrien if (EnableEarlyIfConversion) 568130561Sobrien addPass(&EarlyIfConverterID); 569130561Sobrien if (EnableStPairSuppress) 570130561Sobrien addPass(createAArch64StorePairSuppressPass()); 571130561Sobrien addPass(createAArch64SIMDInstrOptPass()); 572130561Sobrien if (TM->getOptLevel() != CodeGenOpt::None) 573130561Sobrien addPass(createAArch64StackTaggingPreRAPass()); 57433965Sjdp return true; 575130561Sobrien} 57633965Sjdp 57733965Sjdpvoid AArch64PassConfig::addPreRegAlloc() { 57833965Sjdp // Change dead register definitions to refer to the zero register. 57933965Sjdp if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) 580130561Sobrien addPass(createAArch64DeadRegisterDefinitions()); 581130561Sobrien 58233965Sjdp // Use AdvSIMD scalar instructions whenever profitable. 58333965Sjdp if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) { 58433965Sjdp addPass(createAArch64AdvSIMDScalar()); 58533965Sjdp // The AdvSIMD pass may produce copies that can be rewritten to 58633965Sjdp // be register coalescer friendly. 58733965Sjdp addPass(&PeepholeOptimizerID); 58833965Sjdp } 58933965Sjdp} 59033965Sjdp 59133965Sjdpvoid AArch64PassConfig::addPostRegAlloc() { 59233965Sjdp // Remove redundant copy instructions. 59333965Sjdp if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) 59433965Sjdp addPass(createAArch64RedundantCopyEliminationPass()); 59533965Sjdp 59633965Sjdp if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) 59733965Sjdp // Improve performance for some FP/SIMD code for A57. 59833965Sjdp addPass(createAArch64A57FPLoadBalancing()); 59933965Sjdp} 600130561Sobrien 60133965Sjdpvoid AArch64PassConfig::addPreSched2() { 60233965Sjdp // Expand some pseudo instructions to allow proper scheduling. 60333965Sjdp addPass(createAArch64ExpandPseudoPass()); 60433965Sjdp // Use load/store pair instructions when possible. 60560484Sobrien if (TM->getOptLevel() != CodeGenOpt::None) { 60633965Sjdp if (EnableLoadStoreOpt) 60733965Sjdp addPass(createAArch64LoadStoreOptimizationPass()); 60860484Sobrien } 60933965Sjdp 61033965Sjdp // The AArch64SpeculationHardeningPass destroys dominator tree and natural 61133965Sjdp // loop info, which is needed for the FalkorHWPFFixPass and also later on. 61233965Sjdp // Therefore, run the AArch64SpeculationHardeningPass before the 613104834Sobrien // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop 61460484Sobrien // info. 615104834Sobrien addPass(createAArch64SpeculationHardeningPass()); 61660484Sobrien 61760484Sobrien if (TM->getOptLevel() != CodeGenOpt::None) { 618104834Sobrien if (EnableFalkorHWPFFix) 61960484Sobrien addPass(createFalkorHWPFFixPass()); 62060484Sobrien } 62160484Sobrien} 62233965Sjdp 62333965Sjdpvoid AArch64PassConfig::addPreEmitPass() { 62433965Sjdp // Machine Block Placement might have created new opportunities when run 62560484Sobrien // at O3, where the Tail Duplication Threshold is set to 4 instructions. 62633965Sjdp // Run the load/store optimizer once more. 62733965Sjdp if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt) 62833965Sjdp addPass(createAArch64LoadStoreOptimizationPass()); 62933965Sjdp 63033965Sjdp if (EnableA53Fix835769) 63133965Sjdp addPass(createAArch64A53Fix835769()); 63233965Sjdp 63333965Sjdp if (EnableBranchTargets) 63433965Sjdp addPass(createAArch64BranchTargetsPass()); 63533965Sjdp 63633965Sjdp // Relax conditional branch instructions if they're otherwise out of 63733965Sjdp // range of their destination. 63833965Sjdp if (BranchRelaxation) 63933965Sjdp addPass(&BranchRelaxationPassID); 64033965Sjdp 64133965Sjdp // Identify valid longjmp targets for Windows Control Flow Guard. 64233965Sjdp if (TM->getTargetTriple().isOSWindows()) 64333965Sjdp addPass(createCFGuardLongjmpPass()); 64433965Sjdp 64533965Sjdp if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables) 64633965Sjdp addPass(createAArch64CompressJumpTablesPass()); 64733965Sjdp 64833965Sjdp if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && 64933965Sjdp TM->getTargetTriple().isOSBinFormatMachO()) 65033965Sjdp addPass(createAArch64CollectLOHPass()); 65133965Sjdp} 65233965Sjdp