aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-07-26 19:03:47 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-07-26 19:04:23 +0000
commit7fa27ce4a07f19b07799a767fc29416f3b625afb (patch)
tree27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
parente3b557809604d036af6e00c60f012c2025b59a5e (diff)
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp94
1 files changed, 19 insertions, 75 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index b615a0a0a9c0..179ccde8d035 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -46,13 +46,10 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -91,8 +88,9 @@ struct StoreToLoadForwardingCandidate {
StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
: Load(Load), Store(Store) {}
- /// Return true if the dependence from the store to the load has a
- /// distance of one. E.g. A[i+1] = A[i]
+ /// Return true if the dependence from the store to the load has an
+ /// absolute distance of one.
+ /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
@@ -106,11 +104,19 @@ struct StoreToLoadForwardingCandidate {
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");
- // Currently we only support accesses with unit stride. FIXME: we should be
- // able to handle non unit stirde as well as long as the stride is equal to
- // the dependence distance.
- if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
- getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
+ int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
+ int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+ if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
+ return false;
+
+ // TODO: This check for stride values other than 1 and -1 can be eliminated.
+ // However, doing so may cause the LoopAccessAnalysis to overcompensate,
+ // generating numerous non-wrap runtime checks that may undermine the
+ // benefits of load elimination. To safely implement support for non-unit
+ // strides, we would need to ensure either that the processed case does not
+ // require these additional checks, or improve the LAA to handle them more
+ // efficiently, or potentially both.
+ if (std::abs(StrideLoad) != 1)
return false;
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -123,7 +129,7 @@ struct StoreToLoadForwardingCandidate {
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
- return Val == TypeByteSize;
+ return Val == TypeByteSize * StrideLoad;
}
Value *getLoadPtr() const { return Load->getPointerOperand(); }
@@ -658,70 +664,6 @@ static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI,
return Changed;
}
-namespace {
-
-/// The pass. Most of the work is delegated to the per-loop
-/// LoadEliminationForLoop class.
-class LoopLoadElimination : public FunctionPass {
-public:
- static char ID;
-
- LoopLoadElimination() : FunctionPass(ID) {
- initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-
- // Process each loop nest in the function.
- return eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, SE, /*AC*/ nullptr,
- LAIs);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessLegacyAnalysis>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
-char LoopLoadElimination::ID;
-
-static const char LLE_name[] = "Loop Load Elimination";
-
-INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
-INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
-
-FunctionPass *llvm::createLoopLoadEliminationPass() {
- return new LoopLoadElimination();
-}
-
PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &LI = AM.getResult<LoopAnalysis>(F);
@@ -744,5 +686,7 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
return PreservedAnalyses::all();
PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
return PA;
}