From 7fa27ce4a07f19b07799a767fc29416f3b625afb Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 26 Jul 2023 21:03:47 +0200 Subject: Vendor import of llvm-project main llvmorg-17-init-19304-gd0b54bb50e51, the last commit before the upstream release/17.x branch was created. --- llvm/lib/CodeGen/HardwareLoops.cpp | 171 +++++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 54 deletions(-) (limited to 'llvm/lib/CodeGen/HardwareLoops.cpp') diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 258ad1931b12..e7b14d700a44 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -15,8 +15,10 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/HardwareLoops.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -115,12 +117,12 @@ namespace { using TTI = TargetTransformInfo; - class HardwareLoops : public FunctionPass { + class HardwareLoopsLegacy : public FunctionPass { public: static char ID; - HardwareLoops() : FunctionPass(ID) { - initializeHardwareLoopsPass(*PassRegistry::getPassRegistry()); + HardwareLoopsLegacy() : FunctionPass(ID) { + initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -131,29 +133,44 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); } + }; + + class HardwareLoopsImpl { + public: + HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA, + DominatorTree &DT, const DataLayout &DL, + const TargetTransformInfo &TTI, TargetLibraryInfo *TLI, + AssumptionCache &AC, OptimizationRemarkEmitter *ORE, + HardwareLoopOptions &Opts) + : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI), + TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { } + bool run(Function &F); + + private: // Try to convert the given Loop into a hardware loop. - bool TryConvertLoop(Loop *L); + bool TryConvertLoop(Loop *L, LLVMContext &Ctx); // Given that the target believes the loop to be profitable, try to // convert it. bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo); - private: - ScalarEvolution *SE = nullptr; - LoopInfo *LI = nullptr; - const DataLayout *DL = nullptr; - OptimizationRemarkEmitter *ORE = nullptr; - const TargetTransformInfo *TTI = nullptr; - DominatorTree *DT = nullptr; - bool PreserveLCSSA = false; - AssumptionCache *AC = nullptr; - TargetLibraryInfo *LibInfo = nullptr; - Module *M = nullptr; + ScalarEvolution &SE; + LoopInfo &LI; + bool PreserveLCSSA; + DominatorTree &DT; + const DataLayout &DL; + const TargetTransformInfo &TTI; + TargetLibraryInfo *TLI = nullptr; + AssumptionCache &AC; + OptimizationRemarkEmitter *ORE; + HardwareLoopOptions &Opts; bool MadeChange = false; }; @@ -182,8 +199,9 @@ namespace { public: HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, const DataLayout &DL, - OptimizationRemarkEmitter *ORE) : - SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), + OptimizationRemarkEmitter *ORE, + HardwareLoopOptions &Opts) : + SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()), ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), @@ -197,6 +215,7 @@ namespace { ScalarEvolution &SE; const DataLayout &DL; OptimizationRemarkEmitter *ORE = nullptr; + HardwareLoopOptions &Opts; Loop *L = nullptr; Module *M = nullptr; const SCEV *ExitCount = nullptr; @@ -209,40 +228,83 @@ namespace { }; } -char HardwareLoops::ID = 0; +char HardwareLoopsLegacy::ID = 0; -bool HardwareLoops::runOnFunction(Function &F) { +bool HardwareLoopsLegacy::runOnFunction(Function &F) { if (skipFunction(F)) return false; LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n"); - LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis().getSE(); - DT = &getAnalysis().getDomTree(); - TTI = &getAnalysis().getTTI(F); - DL = &F.getParent()->getDataLayout(); - ORE = &getAnalysis().getORE(); + auto &LI = getAnalysis().getLoopInfo(); + auto &SE = getAnalysis().getSE(); + auto &DT = getAnalysis().getDomTree(); + auto &TTI = getAnalysis().getTTI(F); + auto &DL = F.getParent()->getDataLayout(); + auto *ORE = &getAnalysis().getORE(); auto *TLIP = getAnalysisIfAvailable(); - LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; - PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - AC = &getAnalysis().getAssumptionCache(F); - M = F.getParent(); + auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + auto &AC = getAnalysis().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + HardwareLoopOptions Opts; + if (ForceHardwareLoops.getNumOccurrences()) + Opts.setForce(ForceHardwareLoops); + if (ForceHardwareLoopPHI.getNumOccurrences()) + Opts.setForcePhi(ForceHardwareLoopPHI); + if (ForceNestedLoop.getNumOccurrences()) + Opts.setForceNested(ForceNestedLoop); + if (ForceGuardLoopEntry.getNumOccurrences()) + Opts.setForceGuard(ForceGuardLoopEntry); + if (LoopDecrement.getNumOccurrences()) + Opts.setDecrement(LoopDecrement); + if (CounterBitWidth.getNumOccurrences()) + Opts.setCounterBitwidth(CounterBitWidth); - for (Loop *L : *LI) - if (L->isOutermost()) - TryConvertLoop(L); + HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE, + Opts); + return Impl.run(F); +} + +PreservedAnalyses HardwareLoopsPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult(F); + auto &SE = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &TTI = AM.getResult(F); + auto *TLI = &AM.getResult(F); + auto &AC = AM.getResult(F); + auto *ORE = &AM.getResult(F); + auto &DL = F.getParent()->getDataLayout(); + + HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts); + bool Changed = Impl.run(F); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} +bool HardwareLoopsImpl::run(Function &F) { + LLVMContext &Ctx = F.getParent()->getContext(); + for (Loop *L : LI) + if (L->isOutermost()) + TryConvertLoop(L, Ctx); return MadeChange; } // Return true if the search should stop, which will be when an inner loop is // converted and the parent loop doesn't support containing a hardware loop. -bool HardwareLoops::TryConvertLoop(Loop *L) { +bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) { // Process nested loops first. bool AnyChanged = false; for (Loop *SL : *L) - AnyChanged |= TryConvertLoop(SL); + AnyChanged |= TryConvertLoop(SL, Ctx); if (AnyChanged) { reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", ORE, L); @@ -252,39 +314,39 @@ bool HardwareLoops::TryConvertLoop(Loop *L) { LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n"); HardwareLoopInfo HWLoopInfo(L); - if (!HWLoopInfo.canAnalyze(*LI)) { + if (!HWLoopInfo.canAnalyze(LI)) { reportHWLoopFailure("cannot analyze loop, irreducible control flow", "HWLoopCannotAnalyze", ORE, L); return false; } - if (!ForceHardwareLoops && - !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) { + if (!Opts.Force && + !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) { reportHWLoopFailure("it's not profitable to create a hardware-loop", "HWLoopNotProfitable", ORE, L); return false; } // Allow overriding of the counter width and loop decrement value. - if (CounterBitWidth.getNumOccurrences()) - HWLoopInfo.CountType = - IntegerType::get(M->getContext(), CounterBitWidth); + if (Opts.Bitwidth.has_value()) { + HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value()); + } - if (LoopDecrement.getNumOccurrences()) + if (Opts.Decrement.has_value()) HWLoopInfo.LoopDecrement = - ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); + ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value()); MadeChange |= TryConvertLoop(HWLoopInfo); - return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); + return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested); } -bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { +bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { Loop *L = HWLoopInfo.L; LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); - if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, - ForceHardwareLoopPHI)) { + if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(), + Opts.getForcePhi())) { // TODO: there can be many reasons a loop is not considered a // candidate, so we should let isHardwareLoopCandidate fill in the // reason and then report a better message here. @@ -300,11 +362,11 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { // If we don't have a preheader, then insert one. if (!Preheader) - Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); + Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA); if (!Preheader) return false; - HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); + HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts); HWLoop.Create(); ++NumHWLoops; return true; @@ -322,7 +384,7 @@ void HardwareLoop::Create() { Value *Setup = InsertIterationSetup(LoopCountInit); - if (UsePHICounter || ForceHardwareLoopPHI) { + if (UsePHICounter || Opts.ForcePhi) { Instruction *LoopDec = InsertLoopRegDec(LoopCountInit); Value *EltsRem = InsertPHICounter(Setup, LoopDec); LoopDec->setOperand(0, EltsRem); @@ -397,7 +459,8 @@ Value *HardwareLoop::InitLoopCount() { if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, SE.getZero(ExitCount->getType()))) { LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); - UseLoopGuard |= ForceGuardLoopEntry; + if (Opts.ForceGuard) + UseLoopGuard = true; } else UseLoopGuard = false; @@ -441,7 +504,7 @@ Value *HardwareLoop::InitLoopCount() { Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { IRBuilder<> Builder(BeginBB->getTerminator()); Type *Ty = LoopCountInit->getType(); - bool UsePhi = UsePHICounter || ForceHardwareLoopPHI; + bool UsePhi = UsePHICounter || Opts.ForcePhi; Intrinsic::ID ID = UseLoopGuard ? (UsePhi ? Intrinsic::test_start_loop_iterations : Intrinsic::test_set_loop_iterations) @@ -533,11 +596,11 @@ void HardwareLoop::UpdateBranch(Value *EltsRem) { RecursivelyDeleteTriviallyDeadInstructions(OldCond); } -INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) +INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) +INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false) -FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); } +FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); } -- cgit v1.2.3