diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopInterchange.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopInterchange.cpp | 356 |
1 files changed, 192 insertions, 164 deletions
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 2e0d8e0374c0..4f8dafef230a 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1,4 +1,4 @@ -//===- LoopInterchange.cpp - Loop interchange pass------------------------===// +//===- LoopInterchange.cpp - Loop interchange pass-------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,33 +13,38 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include <cassert> +#include <utility> +#include <vector> using namespace llvm; @@ -51,10 +56,12 @@ static cl::opt<int> LoopInterchangeCostThreshold( namespace { -typedef SmallVector<Loop *, 8> LoopVector; +using LoopVector = SmallVector<Loop *, 8>; // TODO: Check if we can use a sparse matrix here. -typedef std::vector<std::vector<char>> CharMatrix; +using CharMatrix = std::vector<std::vector<char>>; + +} // end anonymous namespace // Maximum number of dependencies that can be handled in the dependency matrix. static const unsigned MaxMemInstrCount = 100; @@ -62,14 +69,11 @@ static const unsigned MaxMemInstrCount = 100; // Maximum loop depth supported. static const unsigned MaxLoopNestDepth = 10; -struct LoopInterchange; - #ifdef DUMP_DEP_MATRICIES -void printDepMatrix(CharMatrix &DepMatrix) { - for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) { - std::vector<char> Vec = *I; - for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II) - DEBUG(dbgs() << *II << " "); +static void printDepMatrix(CharMatrix &DepMatrix) { + for (auto &Row : DepMatrix) { + for (auto D : Row) + DEBUG(dbgs() << D << " "); DEBUG(dbgs() << "\n"); } } @@ -77,25 +81,24 @@ void printDepMatrix(CharMatrix &DepMatrix) { static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI) { - typedef SmallVector<Value *, 16> ValueVector; + using ValueVector = SmallVector<Value *, 16>; + ValueVector MemInstr; // For each block. - for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); - BB != BE; ++BB) { + for (BasicBlock *BB : L->blocks()) { // Scan the BB and collect legal loads and stores. - for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; - ++I) { + for (Instruction &I : *BB) { if (!isa<Instruction>(I)) return false; - if (LoadInst *Ld = dyn_cast<LoadInst>(I)) { + if (auto *Ld = dyn_cast<LoadInst>(&I)) { if (!Ld->isSimple()) return false; - MemInstr.push_back(&*I); - } else if (StoreInst *St = dyn_cast<StoreInst>(I)) { + MemInstr.push_back(&I); + } else if (auto *St = dyn_cast<StoreInst>(&I)) { if (!St->isSimple()) return false; - MemInstr.push_back(&*I); + MemInstr.push_back(&I); } } } @@ -171,7 +174,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, } // We don't have a DepMatrix to check legality return false. - if (DepMatrix.size() == 0) + if (DepMatrix.empty()) return false; return true; } @@ -216,7 +219,6 @@ static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep) { - if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) return false; @@ -255,7 +257,6 @@ static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId) { - unsigned NumRows = DepMatrix.size(); // For each row check if it is valid to interchange. for (unsigned Row = 0; Row < NumRows; ++Row) { @@ -270,7 +271,6 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, } static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { - DEBUG(dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n'); @@ -320,6 +320,8 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { return nullptr; } +namespace { + /// LoopInterchangeLegality checks if it is legal to interchange the loop. class LoopInterchangeLegality { public: @@ -327,11 +329,12 @@ public: LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, OptimizationRemarkEmitter *ORE) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {} + PreserveLCSSA(PreserveLCSSA), ORE(ORE) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix); + /// Check if the loop structure is understood. We do not handle triangular /// loops for now. bool isLoopStructureUnderstood(PHINode *InnerInductionVar); @@ -348,6 +351,7 @@ private: bool findInductionAndReductions(Loop *L, SmallVector<PHINode *, 8> &Inductions, SmallVector<PHINode *, 8> &Reductions); + Loop *OuterLoop; Loop *InnerLoop; @@ -355,10 +359,11 @@ private: LoopInfo *LI; DominatorTree *DT; bool PreserveLCSSA; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - bool InnerLoopHasReduction; + bool InnerLoopHasReduction = false; }; /// LoopInterchangeProfitability checks if it is profitable to interchange the @@ -381,6 +386,7 @@ private: /// Scev analysis. ScalarEvolution *SE; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; }; @@ -415,6 +421,7 @@ private: /// Scev analysis. ScalarEvolution *SE; + LoopInfo *LI; DominatorTree *DT; BasicBlock *LoopExit; @@ -424,16 +431,16 @@ private: // Main LoopInterchange Pass. struct LoopInterchange : public FunctionPass { static char ID; - ScalarEvolution *SE; - LoopInfo *LI; - DependenceInfo *DI; - DominatorTree *DT; + ScalarEvolution *SE = nullptr; + LoopInfo *LI = nullptr; + DependenceInfo *DI = nullptr; + DominatorTree *DT = nullptr; bool PreserveLCSSA; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - LoopInterchange() - : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) { + LoopInterchange() : FunctionPass(ID) { initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); } @@ -501,7 +508,6 @@ struct LoopInterchange : public FunctionPass { } bool processLoopList(LoopVector LoopList, Function &F) { - bool Changed = false; unsigned LoopNestDepth = LoopList.size(); if (LoopNestDepth < 2) { @@ -580,7 +586,6 @@ struct LoopInterchange : public FunctionPass { bool processLoop(LoopVector LoopList, unsigned InnerLoopId, unsigned OuterLoopId, BasicBlock *LoopNestExit, std::vector<std::vector<char>> &DependencyMatrix) { - DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); Loop *InnerLoop = LoopList[InnerLoopId]; @@ -599,10 +604,12 @@ struct LoopInterchange : public FunctionPass { return false; } - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Loop interchanged with enclosing loop."); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Interchanged", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Loop interchanged with enclosing loop."; + }); LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit, LIL.hasInnerLoopReduction()); @@ -612,9 +619,10 @@ struct LoopInterchange : public FunctionPass { } }; -} // end of namespace +} // end anonymous namespace + bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { - return none_of(Ins->users(), [=](User *U) -> bool { + return llvm::none_of(Ins->users(), [=](User *U) -> bool { auto *UserIns = dyn_cast<PHINode>(U); RecurrenceDescriptor RD; return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); @@ -664,11 +672,9 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { if (!OuterLoopHeaderBI) return false; - for (unsigned i = 0, e = OuterLoopHeaderBI->getNumSuccessors(); i < e; ++i) { - if (OuterLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader && - OuterLoopHeaderBI->getSuccessor(i) != OuterLoopLatch) + for (BasicBlock *Succ : OuterLoopHeaderBI->successors()) + if (Succ != InnerLoopPreHeader && Succ != OuterLoopLatch) return false; - } DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); // We do not have any basic block in between now make sure the outer header @@ -682,10 +688,8 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { return true; } - bool LoopInterchangeLegality::isLoopStructureUnderstood( PHINode *InnerInduction) { - unsigned Num = InnerInduction->getNumOperands(); BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); for (unsigned i = 0; i < Num; ++i) { @@ -750,12 +754,12 @@ static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock, BasicBlock *LoopHeader) { if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) { - unsigned Num = BI->getNumSuccessors(); - assert(Num == 2); - for (unsigned i = 0; i < Num; ++i) { - if (BI->getSuccessor(i) == LoopHeader) + assert(BI->getNumSuccessors() == 2 && + "Branch leaving loop latch must have 2 successors"); + for (BasicBlock *Succ : BI->successors()) { + if (Succ == LoopHeader) continue; - return BI->getSuccessor(i); + return Succ; } } return nullptr; @@ -764,7 +768,6 @@ static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock, // This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); @@ -777,12 +780,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedPHIInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with induction or reduction PHI nodes can be" - " interchange currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with induction or reduction PHI nodes can be" + " interchange currently."; + }); return true; } @@ -790,12 +794,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (Inductions.size() != 1) { DEBUG(dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "MultiInductionInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with 1 induction variable can be " - "interchanged currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with 1 induction variable can be " + "interchanged currently."; + }); return true; } if (Reductions.size() > 0) @@ -806,12 +811,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedPHIOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with induction or reduction PHI nodes can be" - " interchanged currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with induction or reduction PHI nodes can be" + " interchanged currently."; + }); return true; } @@ -820,35 +826,38 @@ bool LoopInterchangeLegality::currentLimitations() { if (!Reductions.empty()) { DEBUG(dbgs() << "Outer loops with reductions are not supported " << "currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "ReductionsOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Outer loops with reductions cannot be interchangeed " - "currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Outer loops with reductions cannot be interchangeed " + "currently."; + }); return true; } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { DEBUG(dbgs() << "Loops with more than 1 induction variables are not " << "supported currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "MultiIndutionOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with 1 induction variable can be " - "interchanged currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with 1 induction variable can be " + "interchanged currently."; + }); return true; } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { DEBUG(dbgs() << "Loop structure not understood by pass\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedStructureInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Inner loop structure not understood currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Inner loop structure not understood currently."; + }); return true; } @@ -857,24 +866,26 @@ bool LoopInterchangeLegality::currentLimitations() { getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) { DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoLCSSAPHIOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with LCSSA PHIs can be interchange " - "currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with LCSSA PHIs can be interchange " + "currently."; + }); return true; } LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) { DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoLCSSAPHIOuterInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with LCSSA PHIs can be interchange " - "currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with LCSSA PHIs can be interchange " + "currently."; + }); return true; } @@ -899,11 +910,12 @@ bool LoopInterchangeLegality::currentLimitations() { if (!InnerIndexVarInc) { DEBUG(dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoIncrementInInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "The inner loop does not increment the induction variable."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "The inner loop does not increment the induction variable."; + }); return true; } @@ -912,8 +924,9 @@ bool LoopInterchangeLegality::currentLimitations() { // instruction. bool FoundInduction = false; - for (const Instruction &I : reverse(*InnerLoopLatch)) { - if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I)) + for (const Instruction &I : llvm::reverse(*InnerLoopLatch)) { + if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) || + isa<ZExtInst>(I)) continue; // We found an instruction. If this is not induction variable then it is not @@ -921,12 +934,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (!I.isIdenticalTo(InnerIndexVarInc)) { DEBUG(dbgs() << "Found unsupported instructions between induction " << "variable increment and branch.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedInsBetweenInduction", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Found unsupported instruction between induction variable " - "increment and branch."); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "UnsupportedInsBetweenInduction", + InnerLoop->getStartLoc(), InnerLoop->getHeader()) + << "Found unsupported instruction between induction variable " + "increment and branch."; + }); return true; } @@ -937,11 +951,12 @@ bool LoopInterchangeLegality::currentLimitations() { // current limitation. if (!FoundInduction) { DEBUG(dbgs() << "Did not find the induction variable.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoIndutionVariable", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Did not find the induction variable."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Did not find the induction variable."; + }); return true; } return false; @@ -950,19 +965,31 @@ bool LoopInterchangeLegality::currentLimitations() { bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "Dependence", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Cannot interchange loops due to dependences."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops due to dependences."; + }); return false; } + // Check if outer and inner loop contain legal instructions only. + for (auto *BB : OuterLoop->blocks()) + for (Instruction &I : *BB) + if (CallInst *CI = dyn_cast<CallInst>(&I)) { + // readnone functions do not prevent interchanging. + if (CI->doesNotReadMemory()) + continue; + DEBUG(dbgs() << "Loops with call instructions cannot be interchanged " + << "safely."); + return false; + } + // Create unique Preheaders if we already do not have one. BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); @@ -995,12 +1022,13 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, // Check if the loops are tightly nested. if (!tightlyNested(OuterLoop, InnerLoop)) { DEBUG(dbgs() << "Loops not tightly nested\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NotTightlyNested", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Cannot interchange loops because they are not tightly " - "nested."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops because they are not tightly " + "nested."; + }); return false; } @@ -1010,9 +1038,8 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, int LoopInterchangeProfitability::getInstrOrderCost() { unsigned GoodOrder, BadOrder; BadOrder = GoodOrder = 0; - for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end(); - BI != BE; ++BI) { - for (Instruction &Ins : **BI) { + for (BasicBlock *BB : InnerLoop->blocks()) { + for (Instruction &Ins : *BB) { if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { unsigned NumOp = GEP->getNumOperands(); bool FoundInnerInduction = false; @@ -1064,12 +1091,11 @@ static bool isProfitableForVectorization(unsigned InnerLoopId, // TODO: Improve this heuristic to catch more cases. // If the inner loop is loop independent or doesn't carry any dependency it is // profitable to move this to outer position. - unsigned Row = DepMatrix.size(); - for (unsigned i = 0; i < Row; ++i) { - if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I') + for (auto &Row : DepMatrix) { + if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') return false; // TODO: We need to improve this heuristic. - if (DepMatrix[i][OuterLoopId] != '=') + if (Row[OuterLoopId] != '=') return false; } // If outer loop has dependence and inner loop is loop independent then it is @@ -1080,7 +1106,6 @@ static bool isProfitableForVectorization(unsigned InnerLoopId, bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - // TODO: Add better profitability checks. // e.g // 1) Construct dependency matrix and move the one with no loop carried dep @@ -1099,14 +1124,15 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) return true; - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "InterchangeNotProfitable", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Interchanging loops is too costly (cost=" - << ore::NV("Cost", Cost) << ", threshold=" - << ore::NV("Threshold", LoopInterchangeCostThreshold) << - ") and it does not improve parallelism."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Interchanging loops is too costly (cost=" + << ore::NV("Cost", Cost) << ", threshold=" + << ore::NV("Threshold", LoopInterchangeCostThreshold) + << ") and it does not improve parallelism."; + }); return false; } @@ -1145,7 +1171,7 @@ bool LoopInterchangeTransform::transform() { bool Transformed = false; Instruction *InnerIndexVar; - if (InnerLoop->getSubLoops().size() == 0) { + if (InnerLoop->getSubLoops().empty()) { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); DEBUG(dbgs() << "Calling Split Inner Loop\n"); PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); @@ -1159,7 +1185,11 @@ bool LoopInterchangeTransform::transform() { else InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); - // + // Ensure that InductionPHI is the first Phi node as required by + // splitInnerLoopHeader + if (&InductionPHI->getParent()->front() != InductionPHI) + InductionPHI->moveBefore(&InductionPHI->getParent()->front()); + // Split at the place were the induction variable is // incremented/decremented. // TODO: This splitting logic may not work always. Fix this. @@ -1188,13 +1218,12 @@ void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { } void LoopInterchangeTransform::splitInnerLoopHeader() { - // Split the inner loop header out. Here make sure that the reduction PHI's // stay in the innerloop body. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); if (InnerLoopHasReduction) { - // FIXME: Check if the induction PHI will always be the first PHI. + // Note: The induction PHI must be the first PHI for this to work BasicBlock *New = InnerLoopHeader->splitBasicBlock( ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split"); if (LI) @@ -1244,7 +1273,6 @@ void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, } bool LoopInterchangeTransform::adjustLoopBranches() { - DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); @@ -1352,8 +1380,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() { return true; } -void LoopInterchangeTransform::adjustLoopPreheaders() { +void LoopInterchangeTransform::adjustLoopPreheaders() { // We have interchanged the preheaders so we need to interchange the data in // the preheader as well. // This is because the content of inner preheader was previously executed @@ -1373,7 +1401,6 @@ void LoopInterchangeTransform::adjustLoopPreheaders() { } bool LoopInterchangeTransform::adjustLoopLinks() { - // Adjust all branches in the inner and outer loop. bool Changed = adjustLoopBranches(); if (Changed) @@ -1382,6 +1409,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() { } char LoopInterchange::ID = 0; + INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |