diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/Scalar/LoopInterchange.cpp | |
parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) |
Notes
Diffstat (limited to 'lib/Transforms/Scalar/LoopInterchange.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopInterchange.cpp | 424 |
1 files changed, 213 insertions, 211 deletions
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 2978165ed8a9..766e39b439a0 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -271,7 +271,7 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, return true; } -static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { +static LoopVector populateWorklist(Loop &L) { LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n'); @@ -282,16 +282,15 @@ static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { // The current loop has multiple subloops in it hence it is not tightly // nested. // Discard all loops above it added into Worklist. - if (Vec->size() != 1) { - LoopList.clear(); - return; - } + if (Vec->size() != 1) + return {}; + LoopList.push_back(CurrentLoop); CurrentLoop = Vec->front(); Vec = &CurrentLoop->getSubLoops(); } LoopList.push_back(CurrentLoop); - V.push_back(std::move(LoopList)); + return LoopList; } static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { @@ -327,10 +326,8 @@ namespace { class LoopInterchangeLegality { public: LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, - LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, OptimizationRemarkEmitter *ORE) - : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - PreserveLCSSA(PreserveLCSSA), ORE(ORE) {} + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, @@ -342,29 +339,33 @@ public: bool currentLimitations(); - bool hasInnerLoopReduction() { return InnerLoopHasReduction; } + const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const { + return OuterInnerReductions; + } private: bool tightlyNested(Loop *Outer, Loop *Inner); - bool containsUnsafeInstructionsInHeader(BasicBlock *BB); - bool areAllUsesReductions(Instruction *Ins, Loop *L); - bool containsUnsafeInstructionsInLatch(BasicBlock *BB); + bool containsUnsafeInstructions(BasicBlock *BB); + + /// Discover induction and reduction PHIs in the header of \p L. Induction + /// PHIs are added to \p Inductions, reductions are added to + /// OuterInnerReductions. When the outer loop is passed, the inner loop needs + /// to be passed as \p InnerLoop. bool findInductionAndReductions(Loop *L, SmallVector<PHINode *, 8> &Inductions, - SmallVector<PHINode *, 8> &Reductions); + Loop *InnerLoop); Loop *OuterLoop; Loop *InnerLoop; ScalarEvolution *SE; - LoopInfo *LI; - DominatorTree *DT; - bool PreserveLCSSA; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - bool InnerLoopHasReduction = false; + /// Set of reduction PHIs taking part of a reduction across the inner and + /// outer loop. + SmallPtrSet<PHINode *, 4> OuterInnerReductions; }; /// LoopInterchangeProfitability checks if it is profitable to interchange the @@ -398,10 +399,9 @@ public: LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, BasicBlock *LoopNestExit, - bool InnerLoopContainsReductions) + const LoopInterchangeLegality &LIL) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - LoopExit(LoopNestExit), - InnerLoopHasReduction(InnerLoopContainsReductions) {} + LoopExit(LoopNestExit), LIL(LIL) {} /// Interchange OuterLoop and InnerLoop. bool transform(); @@ -416,8 +416,6 @@ private: bool adjustLoopLinks(); void adjustLoopPreheaders(); bool adjustLoopBranches(); - void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, - BasicBlock *NewPred); Loop *OuterLoop; Loop *InnerLoop; @@ -428,41 +426,34 @@ private: LoopInfo *LI; DominatorTree *DT; BasicBlock *LoopExit; - bool InnerLoopHasReduction; + + const LoopInterchangeLegality &LIL; }; // Main LoopInterchange Pass. -struct LoopInterchange : public FunctionPass { +struct LoopInterchange : public LoopPass { static char ID; ScalarEvolution *SE = nullptr; LoopInfo *LI = nullptr; DependenceInfo *DI = nullptr; DominatorTree *DT = nullptr; - bool PreserveLCSSA; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - LoopInterchange() : FunctionPass(ID) { + LoopInterchange() : LoopPass(ID) { initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<DependenceAnalysisWrapperPass>(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); + getLoopAnalysisUsage(AU); } - bool runOnFunction(Function &F) override { - if (skipFunction(F)) + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L) || L->getParentLoop()) return false; SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); @@ -470,21 +461,8 @@ struct LoopInterchange : public FunctionPass { DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - - // Build up a worklist of loop pairs to analyze. - SmallVector<LoopVector, 8> Worklist; - - for (Loop *L : *LI) - populateWorklist(*L, Worklist); - LLVM_DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n"); - bool Changed = true; - while (!Worklist.empty()) { - LoopVector LoopList = Worklist.pop_back_val(); - Changed = processLoopList(LoopList, F); - } - return Changed; + return processLoopList(populateWorklist(*L)); } bool isComputableLoopNest(LoopVector LoopList) { @@ -512,7 +490,7 @@ struct LoopInterchange : public FunctionPass { return LoopList.size() - 1; } - bool processLoopList(LoopVector LoopList, Function &F) { + bool processLoopList(LoopVector LoopList) { bool Changed = false; unsigned LoopNestDepth = LoopList.size(); if (LoopNestDepth < 2) { @@ -580,8 +558,7 @@ struct LoopInterchange : public FunctionPass { Loop *InnerLoop = LoopList[InnerLoopId]; Loop *OuterLoop = LoopList[OuterLoopId]; - LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, - PreserveLCSSA, ORE); + LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); return false; @@ -600,8 +577,8 @@ struct LoopInterchange : public FunctionPass { << "Loop interchanged with enclosing loop."; }); - LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, - LoopNestExit, LIL.hasInnerLoopReduction()); + LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit, + LIL); LIT.transform(); LLVM_DEBUG(dbgs() << "Loops interchanged.\n"); LoopsInterchanged++; @@ -611,42 +588,12 @@ struct LoopInterchange : public FunctionPass { } // end anonymous namespace -bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { - return llvm::none_of(Ins->users(), [=](User *U) -> bool { - auto *UserIns = dyn_cast<PHINode>(U); - RecurrenceDescriptor RD; - return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); +bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) { + return any_of(*BB, [](const Instruction &I) { + return I.mayHaveSideEffects() || I.mayReadFromMemory(); }); } -bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader( - BasicBlock *BB) { - for (Instruction &I : *BB) { - // Load corresponding to reduction PHI's are safe while concluding if - // tightly nested. - if (LoadInst *L = dyn_cast<LoadInst>(&I)) { - if (!areAllUsesReductions(L, InnerLoop)) - return true; - } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) - return true; - } - return false; -} - -bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch( - BasicBlock *BB) { - for (Instruction &I : *BB) { - // Stores corresponding to reductions are safe while concluding if tightly - // nested. - if (StoreInst *L = dyn_cast<StoreInst>(&I)) { - if (!isa<PHINode>(L->getOperand(0))) - return true; - } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) - return true; - } - return false; -} - bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); @@ -662,15 +609,16 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { if (!OuterLoopHeaderBI) return false; - for (BasicBlock *Succ : OuterLoopHeaderBI->successors()) - if (Succ != InnerLoopPreHeader && Succ != OuterLoopLatch) + for (BasicBlock *Succ : successors(OuterLoopHeaderBI)) + if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() && + Succ != OuterLoopLatch) return false; LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); // We do not have any basic block in between now make sure the outer header // and outer loop latch doesn't contain any unsafe instructions. - if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || - containsUnsafeInstructionsInLatch(OuterLoopLatch)) + if (containsUnsafeInstructions(OuterLoopHeader) || + containsUnsafeInstructions(OuterLoopLatch)) return false; LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n"); @@ -702,9 +650,36 @@ bool LoopInterchangeLegality::isLoopStructureUnderstood( return true; } +// If SV is a LCSSA PHI node with a single incoming value, return the incoming +// value. +static Value *followLCSSA(Value *SV) { + PHINode *PHI = dyn_cast<PHINode>(SV); + if (!PHI) + return SV; + + if (PHI->getNumIncomingValues() != 1) + return SV; + return followLCSSA(PHI->getIncomingValue(0)); +} + +// Check V's users to see if it is involved in a reduction in L. +static PHINode *findInnerReductionPhi(Loop *L, Value *V) { + for (Value *User : V->users()) { + if (PHINode *PHI = dyn_cast<PHINode>(User)) { + if (PHI->getNumIncomingValues() == 1) + continue; + RecurrenceDescriptor RD; + if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) + return PHI; + return nullptr; + } + } + + return nullptr; +} + bool LoopInterchangeLegality::findInductionAndReductions( - Loop *L, SmallVector<PHINode *, 8> &Inductions, - SmallVector<PHINode *, 8> &Reductions) { + Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) { if (!L->getLoopLatch() || !L->getLoopPredecessor()) return false; for (PHINode &PHI : L->getHeader()->phis()) { @@ -712,12 +687,33 @@ bool LoopInterchangeLegality::findInductionAndReductions( InductionDescriptor ID; if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) Inductions.push_back(&PHI); - else if (RecurrenceDescriptor::isReductionPHI(&PHI, L, RD)) - Reductions.push_back(&PHI); else { - LLVM_DEBUG( - dbgs() << "Failed to recognize PHI as an induction or reduction.\n"); - return false; + // PHIs in inner loops need to be part of a reduction in the outer loop, + // discovered when checking the PHIs of the outer loop earlier. + if (!InnerLoop) { + if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) { + LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions " + "across the outer loop.\n"); + return false; + } + } else { + assert(PHI.getNumIncomingValues() == 2 && + "Phis in loop header should have exactly 2 incoming values"); + // Check if we have a PHI node in the outer loop that has a reduction + // result from the inner loop as an incoming value. + Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch())); + PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V); + if (!InnerRedPhi || + !llvm::any_of(InnerRedPhi->incoming_values(), + [&PHI](Value *V) { return V == &PHI; })) { + LLVM_DEBUG( + dbgs() + << "Failed to recognize PHI as an induction or reduction.\n"); + return false; + } + OuterInnerReductions.insert(&PHI); + OuterInnerReductions.insert(InnerRedPhi); + } } } return true; @@ -766,81 +762,64 @@ bool LoopInterchangeLegality::currentLimitations() { PHINode *InnerInductionVar; SmallVector<PHINode *, 8> Inductions; - SmallVector<PHINode *, 8> Reductions; - if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { + if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) { LLVM_DEBUG( - dbgs() << "Only inner loops with induction or reduction PHI nodes " + dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"); ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with induction or reduction PHI nodes can be" - " interchange currently."; + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with induction or reduction PHI nodes can be" + " interchanged currently."; }); return true; } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { - LLVM_DEBUG( - dbgs() << "We currently only support loops with 1 induction variable." - << "Failed to interchange due to current limitation\n"); + LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not " + << "supported currently.\n"); ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with 1 induction variable can be " + return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with 1 induction variable can be " "interchanged currently."; }); return true; } - if (Reductions.size() > 0) - InnerLoopHasReduction = true; - InnerInductionVar = Inductions.pop_back_val(); - Reductions.clear(); - if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { + Inductions.clear(); + if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) { LLVM_DEBUG( - dbgs() << "Only outer loops with induction or reduction PHI nodes " + dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"); ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with induction or reduction PHI nodes can be" - " interchanged currently."; + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with induction or reduction PHI nodes can be" + " interchange currently."; }); return true; } - // Outer loop cannot have reduction because then loops will not be tightly - // nested. - if (!Reductions.empty()) { - LLVM_DEBUG(dbgs() << "Outer loops with reductions are not supported " - << "currently.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Outer loops with reductions cannot be interchangeed " - "currently."; - }); - return true; - } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { - LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not " - << "supported currently.\n"); + LLVM_DEBUG( + dbgs() << "We currently only support loops with 1 induction variable." + << "Failed to interchange due to current limitation\n"); ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with 1 induction variable can be " + return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with 1 induction variable can be " "interchanged currently."; }); return true; } + InnerInductionVar = Inductions.pop_back_val(); // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { @@ -1016,28 +995,6 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, return false; } - // Create unique Preheaders if we already do not have one. - BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); - - // Create a unique outer preheader - - // 1) If OuterLoop preheader is not present. - // 2) If OuterLoop Preheader is same as OuterLoop Header - // 3) If OuterLoop Preheader is same as Header of the previous loop. - // 4) If OuterLoop Preheader is Entry node. - if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || - isa<PHINode>(OuterLoopPreHeader->begin()) || - !OuterLoopPreHeader->getUniquePredecessor()) { - OuterLoopPreHeader = - InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA); - } - - if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || - InnerLoopPreHeader == OuterLoop->getHeader()) { - InnerLoopPreHeader = - InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA); - } - // TODO: The loops could not be interchanged due to current limitations in the // transform module. if (currentLimitations()) { @@ -1258,6 +1215,10 @@ void LoopInterchangeTransform::restructureLoops( // outer loop. NewOuter->addBlockEntry(OrigOuterPreHeader); LI->changeLoopFor(OrigOuterPreHeader, NewOuter); + + // Tell SE that we move the loops around. + SE->forgetLoop(NewOuter); + SE->forgetLoop(NewInner); } bool LoopInterchangeTransform::transform() { @@ -1319,9 +1280,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { FromBB->getTerminator()->getIterator()); } -void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, - BasicBlock *OldPred, - BasicBlock *NewPred) { +static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, + BasicBlock *NewPred) { for (PHINode &PHI : CurrBlock->phis()) { unsigned Num = PHI.getNumIncomingValues(); for (unsigned i = 0; i < Num; ++i) { @@ -1336,7 +1296,7 @@ void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector<DominatorTree::UpdateType> &DTUpdates) { - assert(llvm::count_if(BI->successors(), + assert(llvm::count_if(successors(BI), [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && "BI must jump to OldBB at most once."); for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) { @@ -1352,17 +1312,77 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, } } +// Move Lcssa PHIs to the right place. +static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch, + BasicBlock *OuterLatch) { + SmallVector<PHINode *, 8> LcssaInnerExit; + for (PHINode &P : InnerExit->phis()) + LcssaInnerExit.push_back(&P); + + SmallVector<PHINode *, 8> LcssaInnerLatch; + for (PHINode &P : InnerLatch->phis()) + LcssaInnerLatch.push_back(&P); + + // Lcssa PHIs for values used outside the inner loop are in InnerExit. + // If a PHI node has users outside of InnerExit, it has a use outside the + // interchanged loop and we have to preserve it. We move these to + // InnerLatch, which will become the new exit block for the innermost + // loop after interchanging. For PHIs only used in InnerExit, we can just + // replace them with the incoming value. + for (PHINode *P : LcssaInnerExit) { + bool hasUsersOutside = false; + for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) { + Use &U = *UI; + ++UI; + auto *Usr = cast<Instruction>(U.getUser()); + if (Usr->getParent() != InnerExit) { + hasUsersOutside = true; + continue; + } + U.set(P->getIncomingValueForBlock(InnerLatch)); + } + if (hasUsersOutside) + P->moveBefore(InnerLatch->getFirstNonPHI()); + else + P->eraseFromParent(); + } + + // If the inner loop latch contains LCSSA PHIs, those come from a child loop + // and we have to move them to the new inner latch. + for (PHINode *P : LcssaInnerLatch) + P->moveBefore(InnerExit->getFirstNonPHI()); + + // Now adjust the incoming blocks for the LCSSA PHIs. + // For PHIs moved from Inner's exit block, we need to replace Inner's latch + // with the new latch. + updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch); +} + bool LoopInterchangeTransform::adjustLoopBranches() { LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); std::vector<DominatorTree::UpdateType> DTUpdates; + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + + assert(OuterLoopPreHeader != OuterLoop->getHeader() && + InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && + InnerLoopPreHeader && "Guaranteed by loop-simplify form"); + // Ensure that both preheaders do not contain PHI nodes and have single + // predecessors. This allows us to move them easily. We use + // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing + // preheaders do not satisfy those conditions. + if (isa<PHINode>(OuterLoopPreHeader->begin()) || + !OuterLoopPreHeader->getUniquePredecessor()) + OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true); + if (InnerLoopPreHeader == OuterLoop->getHeader()) + InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true); + // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); - BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); BasicBlock *InnerLoopLatchPredecessor = InnerLoopLatch->getUniquePredecessor(); @@ -1417,17 +1437,6 @@ bool LoopInterchangeTransform::adjustLoopBranches() { updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, InnerLoopLatchSuccessor, DTUpdates); - // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with - // the value and remove this PHI node from inner loop. - SmallVector<PHINode *, 8> LcssaVec; - for (PHINode &P : InnerLoopLatchSuccessor->phis()) - LcssaVec.push_back(&P); - - for (PHINode *P : LcssaVec) { - Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); - P->replaceAllUsesWith(Incoming); - P->eraseFromParent(); - } if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); @@ -1439,12 +1448,15 @@ bool LoopInterchangeTransform::adjustLoopBranches() { updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, DTUpdates); - updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); - DT->applyUpdates(DTUpdates); restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader, OuterLoopPreHeader); + moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch); + // For PHIs in the exit block of the outer loop, outer's latch has been + // replaced by Inners'. + updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); + // Now update the reduction PHIs in the inner and outer loop headers. SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs; for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1)) @@ -1452,26 +1464,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() { for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1)) OuterLoopPHIs.push_back(cast<PHINode>(&PHI)); - for (PHINode *PHI : OuterLoopPHIs) - PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); + auto &OuterInnerReductions = LIL.getOuterInnerReductions(); + (void)OuterInnerReductions; - // Move the PHI nodes from the inner loop header to the outer loop header. - // We have to deal with one kind of PHI nodes: - // 1) PHI nodes that are part of inner loop-only reductions. - // We only have to move the PHI node and update the incoming blocks. + // Now move the remaining reduction PHIs from outer to inner loop header and + // vice versa. The PHI nodes must be part of a reduction across the inner and + // outer loop and all the remains to do is and updating the incoming blocks. + for (PHINode *PHI : OuterLoopPHIs) { + PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); + assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() && + "Expected a reduction PHI node"); + } for (PHINode *PHI : InnerLoopPHIs) { PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); - for (BasicBlock *InBB : PHI->blocks()) { - if (InnerLoop->contains(InBB)) - continue; - - assert(!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) && - "Unexpected incoming PHI node, reductions in outer loop are not " - "supported yet"); - PHI->replaceAllUsesWith(PHI->getIncomingValueForBlock(InBB)); - PHI->eraseFromParent(); - break; - } + assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() && + "Expected a reduction PHI node"); } // Update the incoming blocks for moved PHI nodes. @@ -1514,13 +1521,8 @@ char LoopInterchange::ID = 0; INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", |