summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopInterchange.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopInterchange.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp356
1 files changed, 192 insertions, 164 deletions
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 2e0d8e0374c0..4f8dafef230a 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1,4 +1,4 @@
-//===- LoopInterchange.cpp - Loop interchange pass------------------------===//
+//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,33 +13,38 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <cassert>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -51,10 +56,12 @@ static cl::opt<int> LoopInterchangeCostThreshold(
namespace {
-typedef SmallVector<Loop *, 8> LoopVector;
+using LoopVector = SmallVector<Loop *, 8>;
// TODO: Check if we can use a sparse matrix here.
-typedef std::vector<std::vector<char>> CharMatrix;
+using CharMatrix = std::vector<std::vector<char>>;
+
+} // end anonymous namespace
// Maximum number of dependencies that can be handled in the dependency matrix.
static const unsigned MaxMemInstrCount = 100;
@@ -62,14 +69,11 @@ static const unsigned MaxMemInstrCount = 100;
// Maximum loop depth supported.
static const unsigned MaxLoopNestDepth = 10;
-struct LoopInterchange;
-
#ifdef DUMP_DEP_MATRICIES
-void printDepMatrix(CharMatrix &DepMatrix) {
- for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) {
- std::vector<char> Vec = *I;
- for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II)
- DEBUG(dbgs() << *II << " ");
+static void printDepMatrix(CharMatrix &DepMatrix) {
+ for (auto &Row : DepMatrix) {
+ for (auto D : Row)
+ DEBUG(dbgs() << D << " ");
DEBUG(dbgs() << "\n");
}
}
@@ -77,25 +81,24 @@ void printDepMatrix(CharMatrix &DepMatrix) {
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
Loop *L, DependenceInfo *DI) {
- typedef SmallVector<Value *, 16> ValueVector;
+ using ValueVector = SmallVector<Value *, 16>;
+
ValueVector MemInstr;
// For each block.
- for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
- BB != BE; ++BB) {
+ for (BasicBlock *BB : L->blocks()) {
// Scan the BB and collect legal loads and stores.
- for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
- ++I) {
+ for (Instruction &I : *BB) {
if (!isa<Instruction>(I))
return false;
- if (LoadInst *Ld = dyn_cast<LoadInst>(I)) {
+ if (auto *Ld = dyn_cast<LoadInst>(&I)) {
if (!Ld->isSimple())
return false;
- MemInstr.push_back(&*I);
- } else if (StoreInst *St = dyn_cast<StoreInst>(I)) {
+ MemInstr.push_back(&I);
+ } else if (auto *St = dyn_cast<StoreInst>(&I)) {
if (!St->isSimple())
return false;
- MemInstr.push_back(&*I);
+ MemInstr.push_back(&I);
}
}
}
@@ -171,7 +174,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
}
// We don't have a DepMatrix to check legality return false.
- if (DepMatrix.size() == 0)
+ if (DepMatrix.empty())
return false;
return true;
}
@@ -216,7 +219,6 @@ static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
unsigned OuterLoopId, char InnerDep,
char OuterDep) {
-
if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
return false;
@@ -255,7 +257,6 @@ static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
unsigned InnerLoopId,
unsigned OuterLoopId) {
-
unsigned NumRows = DepMatrix.size();
// For each row check if it is valid to interchange.
for (unsigned Row = 0; Row < NumRows; ++Row) {
@@ -270,7 +271,6 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
}
static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
-
DEBUG(dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() << " Loop: %"
<< L.getHeader()->getName() << '\n');
@@ -320,6 +320,8 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
return nullptr;
}
+namespace {
+
/// LoopInterchangeLegality checks if it is legal to interchange the loop.
class LoopInterchangeLegality {
public:
@@ -327,11 +329,12 @@ public:
LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA,
OptimizationRemarkEmitter *ORE)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
- PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {}
+ PreserveLCSSA(PreserveLCSSA), ORE(ORE) {}
/// Check if the loops can be interchanged.
bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
CharMatrix &DepMatrix);
+
/// Check if the loop structure is understood. We do not handle triangular
/// loops for now.
bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
@@ -348,6 +351,7 @@ private:
bool findInductionAndReductions(Loop *L,
SmallVector<PHINode *, 8> &Inductions,
SmallVector<PHINode *, 8> &Reductions);
+
Loop *OuterLoop;
Loop *InnerLoop;
@@ -355,10 +359,11 @@ private:
LoopInfo *LI;
DominatorTree *DT;
bool PreserveLCSSA;
+
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- bool InnerLoopHasReduction;
+ bool InnerLoopHasReduction = false;
};
/// LoopInterchangeProfitability checks if it is profitable to interchange the
@@ -381,6 +386,7 @@ private:
/// Scev analysis.
ScalarEvolution *SE;
+
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
};
@@ -415,6 +421,7 @@ private:
/// Scev analysis.
ScalarEvolution *SE;
+
LoopInfo *LI;
DominatorTree *DT;
BasicBlock *LoopExit;
@@ -424,16 +431,16 @@ private:
// Main LoopInterchange Pass.
struct LoopInterchange : public FunctionPass {
static char ID;
- ScalarEvolution *SE;
- LoopInfo *LI;
- DependenceInfo *DI;
- DominatorTree *DT;
+ ScalarEvolution *SE = nullptr;
+ LoopInfo *LI = nullptr;
+ DependenceInfo *DI = nullptr;
+ DominatorTree *DT = nullptr;
bool PreserveLCSSA;
+
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- LoopInterchange()
- : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) {
+ LoopInterchange() : FunctionPass(ID) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
}
@@ -501,7 +508,6 @@ struct LoopInterchange : public FunctionPass {
}
bool processLoopList(LoopVector LoopList, Function &F) {
-
bool Changed = false;
unsigned LoopNestDepth = LoopList.size();
if (LoopNestDepth < 2) {
@@ -580,7 +586,6 @@ struct LoopInterchange : public FunctionPass {
bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
unsigned OuterLoopId, BasicBlock *LoopNestExit,
std::vector<std::vector<char>> &DependencyMatrix) {
-
DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
Loop *InnerLoop = LoopList[InnerLoopId];
@@ -599,10 +604,12 @@ struct LoopInterchange : public FunctionPass {
return false;
}
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Loop interchanged with enclosing loop.");
+ ORE->emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Interchanged",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Loop interchanged with enclosing loop.";
+ });
LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
LoopNestExit, LIL.hasInnerLoopReduction());
@@ -612,9 +619,10 @@ struct LoopInterchange : public FunctionPass {
}
};
-} // end of namespace
+} // end anonymous namespace
+
bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
- return none_of(Ins->users(), [=](User *U) -> bool {
+ return llvm::none_of(Ins->users(), [=](User *U) -> bool {
auto *UserIns = dyn_cast<PHINode>(U);
RecurrenceDescriptor RD;
return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
@@ -664,11 +672,9 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
if (!OuterLoopHeaderBI)
return false;
- for (unsigned i = 0, e = OuterLoopHeaderBI->getNumSuccessors(); i < e; ++i) {
- if (OuterLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
- OuterLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
+ for (BasicBlock *Succ : OuterLoopHeaderBI->successors())
+ if (Succ != InnerLoopPreHeader && Succ != OuterLoopLatch)
return false;
- }
DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
// We do not have any basic block in between now make sure the outer header
@@ -682,10 +688,8 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
return true;
}
-
bool LoopInterchangeLegality::isLoopStructureUnderstood(
PHINode *InnerInduction) {
-
unsigned Num = InnerInduction->getNumOperands();
BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
for (unsigned i = 0; i < Num; ++i) {
@@ -750,12 +754,12 @@ static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock,
BasicBlock *LoopHeader) {
if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) {
- unsigned Num = BI->getNumSuccessors();
- assert(Num == 2);
- for (unsigned i = 0; i < Num; ++i) {
- if (BI->getSuccessor(i) == LoopHeader)
+ assert(BI->getNumSuccessors() == 2 &&
+ "Branch leaving loop latch must have 2 successors");
+ for (BasicBlock *Succ : BI->successors()) {
+ if (Succ == LoopHeader)
continue;
- return BI->getSuccessor(i);
+ return Succ;
}
}
return nullptr;
@@ -764,7 +768,6 @@ static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock,
// This function indicates the current limitations in the transform as a result
// of which we do not proceed.
bool LoopInterchangeLegality::currentLimitations() {
-
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
@@ -777,12 +780,13 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "UnsupportedPHIInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with induction or reduction PHI nodes can be"
- " interchange currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with induction or reduction PHI nodes can be"
+ " interchange currently.";
+ });
return true;
}
@@ -790,12 +794,13 @@ bool LoopInterchangeLegality::currentLimitations() {
if (Inductions.size() != 1) {
DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "MultiInductionInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with 1 induction variable can be "
- "interchanged currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with 1 induction variable can be "
+ "interchanged currently.";
+ });
return true;
}
if (Reductions.size() > 0)
@@ -806,12 +811,13 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "UnsupportedPHIOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Only outer loops with induction or reduction PHI nodes can be"
- " interchanged currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with induction or reduction PHI nodes can be"
+ " interchanged currently.";
+ });
return true;
}
@@ -820,35 +826,38 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!Reductions.empty()) {
DEBUG(dbgs() << "Outer loops with reductions are not supported "
<< "currently.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "ReductionsOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Outer loops with reductions cannot be interchangeed "
- "currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Outer loops with reductions cannot be interchangeed "
+ "currently.";
+ });
return true;
}
// TODO: Currently we handle only loops with 1 induction variable.
if (Inductions.size() != 1) {
DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
<< "supported currently.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "MultiIndutionOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Only outer loops with 1 induction variable can be "
- "interchanged currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with 1 induction variable can be "
+ "interchanged currently.";
+ });
return true;
}
// TODO: Triangular loops are not handled for now.
if (!isLoopStructureUnderstood(InnerInductionVar)) {
DEBUG(dbgs() << "Loop structure not understood by pass\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "UnsupportedStructureInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Inner loop structure not understood currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Inner loop structure not understood currently.";
+ });
return true;
}
@@ -857,24 +866,26 @@ bool LoopInterchangeLegality::currentLimitations() {
getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) {
DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "NoLCSSAPHIOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Only outer loops with LCSSA PHIs can be interchange "
- "currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with LCSSA PHIs can be interchange "
+ "currently.";
+ });
return true;
}
LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) {
DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "NoLCSSAPHIOuterInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with LCSSA PHIs can be interchange "
- "currently.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with LCSSA PHIs can be interchange "
+ "currently.";
+ });
return true;
}
@@ -899,11 +910,12 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!InnerIndexVarInc) {
DEBUG(dbgs() << "Did not find an instruction to increment the induction "
<< "variable.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "NoIncrementInInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "The inner loop does not increment the induction variable.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "The inner loop does not increment the induction variable.";
+ });
return true;
}
@@ -912,8 +924,9 @@ bool LoopInterchangeLegality::currentLimitations() {
// instruction.
bool FoundInduction = false;
- for (const Instruction &I : reverse(*InnerLoopLatch)) {
- if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I))
+ for (const Instruction &I : llvm::reverse(*InnerLoopLatch)) {
+ if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
+ isa<ZExtInst>(I))
continue;
// We found an instruction. If this is not induction variable then it is not
@@ -921,12 +934,13 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!I.isIdenticalTo(InnerIndexVarInc)) {
DEBUG(dbgs() << "Found unsupported instructions between induction "
<< "variable increment and branch.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "UnsupportedInsBetweenInduction",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Found unsupported instruction between induction variable "
- "increment and branch.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(
+ DEBUG_TYPE, "UnsupportedInsBetweenInduction",
+ InnerLoop->getStartLoc(), InnerLoop->getHeader())
+ << "Found unsupported instruction between induction variable "
+ "increment and branch.";
+ });
return true;
}
@@ -937,11 +951,12 @@ bool LoopInterchangeLegality::currentLimitations() {
// current limitation.
if (!FoundInduction) {
DEBUG(dbgs() << "Did not find the induction variable.\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "NoIndutionVariable",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Did not find the induction variable.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Did not find the induction variable.";
+ });
return true;
}
return false;
@@ -950,19 +965,31 @@ bool LoopInterchangeLegality::currentLimitations() {
bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
unsigned OuterLoopId,
CharMatrix &DepMatrix) {
-
if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId
<< " due to dependence\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "Dependence",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Cannot interchange loops due to dependences.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Cannot interchange loops due to dependences.";
+ });
return false;
}
+ // Check if outer and inner loop contain legal instructions only.
+ for (auto *BB : OuterLoop->blocks())
+ for (Instruction &I : *BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // readnone functions do not prevent interchanging.
+ if (CI->doesNotReadMemory())
+ continue;
+ DEBUG(dbgs() << "Loops with call instructions cannot be interchanged "
+ << "safely.");
+ return false;
+ }
+
// Create unique Preheaders if we already do not have one.
BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
@@ -995,12 +1022,13 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
// Check if the loops are tightly nested.
if (!tightlyNested(OuterLoop, InnerLoop)) {
DEBUG(dbgs() << "Loops not tightly nested\n");
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "NotTightlyNested",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Cannot interchange loops because they are not tightly "
- "nested.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Cannot interchange loops because they are not tightly "
+ "nested.";
+ });
return false;
}
@@ -1010,9 +1038,8 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
int LoopInterchangeProfitability::getInstrOrderCost() {
unsigned GoodOrder, BadOrder;
BadOrder = GoodOrder = 0;
- for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
- BI != BE; ++BI) {
- for (Instruction &Ins : **BI) {
+ for (BasicBlock *BB : InnerLoop->blocks()) {
+ for (Instruction &Ins : *BB) {
if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
unsigned NumOp = GEP->getNumOperands();
bool FoundInnerInduction = false;
@@ -1064,12 +1091,11 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
// TODO: Improve this heuristic to catch more cases.
// If the inner loop is loop independent or doesn't carry any dependency it is
// profitable to move this to outer position.
- unsigned Row = DepMatrix.size();
- for (unsigned i = 0; i < Row; ++i) {
- if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I')
+ for (auto &Row : DepMatrix) {
+ if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
return false;
// TODO: We need to improve this heuristic.
- if (DepMatrix[i][OuterLoopId] != '=')
+ if (Row[OuterLoopId] != '=')
return false;
}
// If outer loop has dependence and inner loop is loop independent then it is
@@ -1080,7 +1106,6 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
unsigned OuterLoopId,
CharMatrix &DepMatrix) {
-
// TODO: Add better profitability checks.
// e.g
// 1) Construct dependency matrix and move the one with no loop carried dep
@@ -1099,14 +1124,15 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
return true;
- ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
- "InterchangeNotProfitable",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Interchanging loops is too costly (cost="
- << ore::NV("Cost", Cost) << ", threshold="
- << ore::NV("Threshold", LoopInterchangeCostThreshold) <<
- ") and it does not improve parallelism.");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Interchanging loops is too costly (cost="
+ << ore::NV("Cost", Cost) << ", threshold="
+ << ore::NV("Threshold", LoopInterchangeCostThreshold)
+ << ") and it does not improve parallelism.";
+ });
return false;
}
@@ -1145,7 +1171,7 @@ bool LoopInterchangeTransform::transform() {
bool Transformed = false;
Instruction *InnerIndexVar;
- if (InnerLoop->getSubLoops().size() == 0) {
+ if (InnerLoop->getSubLoops().empty()) {
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
DEBUG(dbgs() << "Calling Split Inner Loop\n");
PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
@@ -1159,7 +1185,11 @@ bool LoopInterchangeTransform::transform() {
else
InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
- //
+ // Ensure that InductionPHI is the first Phi node as required by
+ // splitInnerLoopHeader
+ if (&InductionPHI->getParent()->front() != InductionPHI)
+ InductionPHI->moveBefore(&InductionPHI->getParent()->front());
+
// Split at the place were the induction variable is
// incremented/decremented.
// TODO: This splitting logic may not work always. Fix this.
@@ -1188,13 +1218,12 @@ void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
}
void LoopInterchangeTransform::splitInnerLoopHeader() {
-
// Split the inner loop header out. Here make sure that the reduction PHI's
// stay in the innerloop body.
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
if (InnerLoopHasReduction) {
- // FIXME: Check if the induction PHI will always be the first PHI.
+ // Note: The induction PHI must be the first PHI for this to work
BasicBlock *New = InnerLoopHeader->splitBasicBlock(
++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split");
if (LI)
@@ -1244,7 +1273,6 @@ void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
}
bool LoopInterchangeTransform::adjustLoopBranches() {
-
DEBUG(dbgs() << "adjustLoopBranches called\n");
// Adjust the loop preheader
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
@@ -1352,8 +1380,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
return true;
}
-void LoopInterchangeTransform::adjustLoopPreheaders() {
+void LoopInterchangeTransform::adjustLoopPreheaders() {
// We have interchanged the preheaders so we need to interchange the data in
// the preheader as well.
// This is because the content of inner preheader was previously executed
@@ -1373,7 +1401,6 @@ void LoopInterchangeTransform::adjustLoopPreheaders() {
}
bool LoopInterchangeTransform::adjustLoopLinks() {
-
// Adjust all branches in the inner and outer loop.
bool Changed = adjustLoopBranches();
if (Changed)
@@ -1382,6 +1409,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
}
char LoopInterchange::ID = 0;
+
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)