summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopLoadElimination.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopLoadElimination.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopLoadElimination.cpp75
1 files changed, 57 insertions, 18 deletions
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 1064d088514d5..f29228c7659e2 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <forward_list>
@@ -61,7 +62,8 @@ struct StoreToLoadForwardingCandidate {
/// \brief Return true if the dependence from the store to the load has a
/// distance of one. E.g. A[i+1] = A[i]
- bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
+ bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
+ Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
Value *StorePtr = Store->getPointerOperand();
Type *LoadPtrType = LoadPtr->getType();
@@ -72,6 +74,13 @@ struct StoreToLoadForwardingCandidate {
LoadType == StorePtr->getType()->getPointerElementType() &&
"Should be a known dependence");
+ // Currently we only support accesses with unit stride. FIXME: we should be
+ // able to handle non unit stirde as well as long as the stride is equal to
+ // the dependence distance.
+ if (getPtrStride(PSE, LoadPtr, L) != 1 ||
+ getPtrStride(PSE, StorePtr, L) != 1)
+ return false;
+
auto &DL = Load->getParent()->getModule()->getDataLayout();
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -83,7 +92,7 @@ struct StoreToLoadForwardingCandidate {
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
- return Val.abs() == TypeByteSize;
+ return Val == TypeByteSize;
}
Value *getLoadPtr() const { return Load->getPointerOperand(); }
@@ -110,12 +119,17 @@ bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
});
}
+/// \brief Return true if the load is not executed on all paths in the loop.
+static bool isLoadConditional(LoadInst *Load, Loop *L) {
+ return Load->getParent() != L->getHeader();
+}
+
/// \brief The per-loop class that does most of the work.
class LoadEliminationForLoop {
public:
LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
DominatorTree *DT)
- : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {}
+ : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
/// \brief Look through the loop-carried and loop-independent dependences in
/// this loop and find store->load dependences.
@@ -162,6 +176,12 @@ public:
auto *Load = dyn_cast<LoadInst>(Destination);
if (!Load)
continue;
+
+ // Only progagate the value if they are of the same type.
+ if (Store->getPointerOperand()->getType() !=
+ Load->getPointerOperand()->getType())
+ continue;
+
Candidates.emplace_front(Load, Store);
}
@@ -219,12 +239,12 @@ public:
if (OtherCand == nullptr)
continue;
- // Handle the very basic of case when the two stores are in the same
- // block so deciding which one forwards is easy. The later one forwards
- // as long as they both have a dependence distance of one to the load.
+ // Handle the very basic case when the two stores are in the same block
+ // so deciding which one forwards is easy. The later one forwards as
+ // long as they both have a dependence distance of one to the load.
if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
- Cand.isDependenceDistanceOfOne(PSE) &&
- OtherCand->isDependenceDistanceOfOne(PSE)) {
+ Cand.isDependenceDistanceOfOne(PSE, L) &&
+ OtherCand->isDependenceDistanceOfOne(PSE, L)) {
// They are in the same block, the later one will forward to the load.
if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
OtherCand = &Cand;
@@ -429,14 +449,21 @@ public:
unsigned NumForwarding = 0;
for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
DEBUG(dbgs() << "Candidate " << Cand);
+
// Make sure that the stored values is available everywhere in the loop in
// the next iteration.
if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
continue;
+ // If the load is conditional we can't hoist its 0-iteration instance to
+ // the preheader because that would make it unconditional. Thus we would
+ // access a memory location that the original loop did not access.
+ if (isLoadConditional(Cand.Load, L))
+ continue;
+
// Check whether the SCEV difference is the same as the induction step,
// thus we load the value in the next iteration.
- if (!Cand.isDependenceDistanceOfOne(PSE))
+ if (!Cand.isDependenceDistanceOfOne(PSE, L))
continue;
++NumForwarding;
@@ -459,18 +486,25 @@ public:
return false;
}
- if (LAI.PSE.getUnionPredicate().getComplexity() >
+ if (LAI.getPSE().getUnionPredicate().getComplexity() >
LoadElimSCEVCheckThreshold) {
DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
return false;
}
- // Point of no-return, start the transformation. First, version the loop if
- // necessary.
- if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
+ if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
+ if (L->getHeader()->getParent()->optForSize()) {
+ DEBUG(dbgs() << "Versioning is needed but not allowed when optimizing "
+ "for size.\n");
+ return false;
+ }
+
+ // Point of no-return, start the transformation. First, version the loop
+ // if necessary.
+
LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
LV.setAliasChecks(std::move(Checks));
- LV.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
LV.versionLoop();
}
@@ -508,8 +542,11 @@ public:
}
bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *LAA = &getAnalysis<LoopAccessAnalysis>();
+ auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Build up a worklist of inner-loops to vectorize. This is necessary as the
@@ -526,7 +563,7 @@ public:
// Now walk the identified inner loops.
bool Changed = false;
for (Loop *L : Worklist) {
- const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+ const LoopAccessInfo &LAI = LAA->getInfo(L);
// The actual work is performed by LoadEliminationForLoop.
LoadEliminationForLoop LEL(L, LI, LAI, DT);
Changed |= LEL.processLoop();
@@ -537,9 +574,10 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<LoopAccessLegacyAnalysis>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -554,9 +592,10 @@ static const char LLE_name[] = "Loop Load Elimination";
INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
namespace llvm {