diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp | 94 |
1 files changed, 19 insertions, 75 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index b615a0a0a9c0..179ccde8d035 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -46,13 +46,10 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopVersioning.h" @@ -91,8 +88,9 @@ struct StoreToLoadForwardingCandidate { StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store) : Load(Load), Store(Store) {} - /// Return true if the dependence from the store to the load has a - /// distance of one. E.g. A[i+1] = A[i] + /// Return true if the dependence from the store to the load has an + /// absolute distance of one. + /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop) bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, Loop *L) const { Value *LoadPtr = Load->getPointerOperand(); @@ -106,11 +104,19 @@ struct StoreToLoadForwardingCandidate { DL.getTypeSizeInBits(getLoadStoreType(Store)) && "Should be a known dependence"); - // Currently we only support accesses with unit stride. FIXME: we should be - // able to handle non unit stirde as well as long as the stride is equal to - // the dependence distance. - if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 || - getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1) + int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0); + int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0); + if (!StrideLoad || !StrideStore || StrideLoad != StrideStore) + return false; + + // TODO: This check for stride values other than 1 and -1 can be eliminated. + // However, doing so may cause the LoopAccessAnalysis to overcompensate, + // generating numerous non-wrap runtime checks that may undermine the + // benefits of load elimination. To safely implement support for non-unit + // strides, we would need to ensure either that the processed case does not + // require these additional checks, or improve the LAA to handle them more + // efficiently, or potentially both. + if (std::abs(StrideLoad) != 1) return false; unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType)); @@ -123,7 +129,7 @@ struct StoreToLoadForwardingCandidate { auto *Dist = cast<SCEVConstant>( PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); const APInt &Val = Dist->getAPInt(); - return Val == TypeByteSize; + return Val == TypeByteSize * StrideLoad; } Value *getLoadPtr() const { return Load->getPointerOperand(); } @@ -658,70 +664,6 @@ static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, return Changed; } -namespace { - -/// The pass. Most of the work is delegated to the per-loop -/// LoadEliminationForLoop class. -class LoopLoadElimination : public FunctionPass { -public: - static char ID; - - LoopLoadElimination() : FunctionPass(ID) { - initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - - auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs(); - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : - nullptr; - auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - - // Process each loop nest in the function. - return eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, SE, /*AC*/ nullptr, - LAIs); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequired<LoopAccessLegacyAnalysis>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); - } -}; - -} // end anonymous namespace - -char LoopLoadElimination::ID; - -static const char LLE_name[] = "Loop Load Elimination"; - -INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) -INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) - -FunctionPass *llvm::createLoopLoadEliminationPass() { - return new LoopLoadElimination(); -} - PreservedAnalyses LoopLoadEliminationPass::run(Function &F, FunctionAnalysisManager &AM) { auto &LI = AM.getResult<LoopAnalysis>(F); @@ -744,5 +686,7 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F, return PreservedAnalyses::all(); PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); return PA; } |