diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopLoadElimination.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopLoadElimination.cpp | 75 |
1 files changed, 57 insertions, 18 deletions
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index 1064d088514d5..f29228c7659e2 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include <forward_list> @@ -61,7 +62,8 @@ struct StoreToLoadForwardingCandidate { /// \brief Return true if the dependence from the store to the load has a /// distance of one. E.g. A[i+1] = A[i] - bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const { + bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, + Loop *L) const { Value *LoadPtr = Load->getPointerOperand(); Value *StorePtr = Store->getPointerOperand(); Type *LoadPtrType = LoadPtr->getType(); @@ -72,6 +74,13 @@ struct StoreToLoadForwardingCandidate { LoadType == StorePtr->getType()->getPointerElementType() && "Should be a known dependence"); + // Currently we only support accesses with unit stride. FIXME: we should be + // able to handle non unit stirde as well as long as the stride is equal to + // the dependence distance. + if (getPtrStride(PSE, LoadPtr, L) != 1 || + getPtrStride(PSE, StorePtr, L) != 1) + return false; + auto &DL = Load->getParent()->getModule()->getDataLayout(); unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType)); @@ -83,7 +92,7 @@ struct StoreToLoadForwardingCandidate { auto *Dist = cast<SCEVConstant>( PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); const APInt &Val = Dist->getAPInt(); - return Val.abs() == TypeByteSize; + return Val == TypeByteSize; } Value *getLoadPtr() const { return Load->getPointerOperand(); } @@ -110,12 +119,17 @@ bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, }); } +/// \brief Return true if the load is not executed on all paths in the loop. +static bool isLoadConditional(LoadInst *Load, Loop *L) { + return Load->getParent() != L->getHeader(); +} + /// \brief The per-loop class that does most of the work. class LoadEliminationForLoop { public: LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI, DominatorTree *DT) - : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {} + : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {} /// \brief Look through the loop-carried and loop-independent dependences in /// this loop and find store->load dependences. @@ -162,6 +176,12 @@ public: auto *Load = dyn_cast<LoadInst>(Destination); if (!Load) continue; + + // Only progagate the value if they are of the same type. + if (Store->getPointerOperand()->getType() != + Load->getPointerOperand()->getType()) + continue; + Candidates.emplace_front(Load, Store); } @@ -219,12 +239,12 @@ public: if (OtherCand == nullptr) continue; - // Handle the very basic of case when the two stores are in the same - // block so deciding which one forwards is easy. The later one forwards - // as long as they both have a dependence distance of one to the load. + // Handle the very basic case when the two stores are in the same block + // so deciding which one forwards is easy. The later one forwards as + // long as they both have a dependence distance of one to the load. if (Cand.Store->getParent() == OtherCand->Store->getParent() && - Cand.isDependenceDistanceOfOne(PSE) && - OtherCand->isDependenceDistanceOfOne(PSE)) { + Cand.isDependenceDistanceOfOne(PSE, L) && + OtherCand->isDependenceDistanceOfOne(PSE, L)) { // They are in the same block, the later one will forward to the load. if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store)) OtherCand = &Cand; @@ -429,14 +449,21 @@ public: unsigned NumForwarding = 0; for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) { DEBUG(dbgs() << "Candidate " << Cand); + // Make sure that the stored values is available everywhere in the loop in // the next iteration. if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT)) continue; + // If the load is conditional we can't hoist its 0-iteration instance to + // the preheader because that would make it unconditional. Thus we would + // access a memory location that the original loop did not access. + if (isLoadConditional(Cand.Load, L)) + continue; + // Check whether the SCEV difference is the same as the induction step, // thus we load the value in the next iteration. - if (!Cand.isDependenceDistanceOfOne(PSE)) + if (!Cand.isDependenceDistanceOfOne(PSE, L)) continue; ++NumForwarding; @@ -459,18 +486,25 @@ public: return false; } - if (LAI.PSE.getUnionPredicate().getComplexity() > + if (LAI.getPSE().getUnionPredicate().getComplexity() > LoadElimSCEVCheckThreshold) { DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; } - // Point of no-return, start the transformation. First, version the loop if - // necessary. - if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) { + if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) { + if (L->getHeader()->getParent()->optForSize()) { + DEBUG(dbgs() << "Versioning is needed but not allowed when optimizing " + "for size.\n"); + return false; + } + + // Point of no-return, start the transformation. First, version the loop + // if necessary. + LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false); LV.setAliasChecks(std::move(Checks)); - LV.setSCEVChecks(LAI.PSE.getUnionPredicate()); + LV.setSCEVChecks(LAI.getPSE().getUnionPredicate()); LV.versionLoop(); } @@ -508,8 +542,11 @@ public: } bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *LAA = &getAnalysis<LoopAccessAnalysis>(); + auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); // Build up a worklist of inner-loops to vectorize. This is necessary as the @@ -526,7 +563,7 @@ public: // Now walk the identified inner loops. bool Changed = false; for (Loop *L : Worklist) { - const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap()); + const LoopAccessInfo &LAI = LAA->getInfo(L); // The actual work is performed by LoadEliminationForLoop. LoadEliminationForLoop LEL(L, LI, LAI, DT); Changed |= LEL.processLoop(); @@ -537,9 +574,10 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredID(LoopSimplifyID); AU.addRequired<LoopInfoWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequired<LoopAccessAnalysis>(); + AU.addRequired<LoopAccessLegacyAnalysis>(); AU.addRequired<ScalarEvolutionWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); @@ -554,9 +592,10 @@ static const char LLE_name[] = "Loop Load Elimination"; INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) namespace llvm { |