diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineLICM.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/MachineLICM.cpp | 344 |
1 files changed, 236 insertions, 108 deletions
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 4e80e9b58c06..efc19f8fdbf8 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -72,6 +72,11 @@ static cl::opt<bool> HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden); + +static cl::opt<bool> HoistConstLoads("hoist-const-loads", + cl::desc("Hoist invariant loads"), + cl::init(true), cl::Hidden); + // The default threshold of 100 (i.e. if target block is 100 times hotter) // is based on empirical data on a single target and is subject to tuning. static cl::opt<unsigned> @@ -110,6 +115,7 @@ STATISTIC(NumNotHoistedDueToHotness, "Number of instructions not hoisted due to block frequency"); namespace { + enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 }; class MachineLICMBase : public MachineFunctionPass { const TargetInstrInfo *TII = nullptr; @@ -130,13 +136,21 @@ namespace { // State that is updated as we process loops bool Changed = false; // True if a loop is changed. bool FirstInLoop = false; // True if it's the first LICM in the loop. - MachineLoop *CurLoop = nullptr; // The current loop we are working on. - MachineBasicBlock *CurPreheader = nullptr; // The preheader for CurLoop. - // Exit blocks for CurLoop. - SmallVector<MachineBasicBlock *, 8> ExitBlocks; + // Holds information about whether it is allowed to move load instructions + // out of the loop + SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads; + + // Exit blocks of each Loop. + DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap; + + bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) { + if (ExitBlockMap.contains(CurLoop)) + return is_contained(ExitBlockMap[CurLoop], MBB); - bool isExitBlock(const MachineBasicBlock *MBB) const { + SmallVector<MachineBasicBlock *, 8> ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + ExitBlockMap[CurLoop] = ExitBlocks; return is_contained(ExitBlocks, MBB); } @@ -151,8 +165,10 @@ namespace { // Register pressure on path leading from loop preheader to current BB. SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; - // For each opcode, keep a list of potential CSE instructions. - DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap; + // For each opcode per preheader, keep a list of potential CSE instructions. + DenseMap<MachineBasicBlock *, + DenseMap<unsigned, std::vector<MachineInstr *>>> + CSEMap; enum { SpeculateFalse = 0, @@ -187,6 +203,7 @@ namespace { RegLimit.clear(); BackTrace.clear(); CSEMap.clear(); + ExitBlockMap.clear(); } private: @@ -200,24 +217,27 @@ namespace { : MI(mi), Def(def), FI(fi) {} }; - void HoistRegionPostRA(); + void HoistRegionPostRA(MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader); - void HoistPostRA(MachineInstr *MI, unsigned Def); + void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader); void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVectorImpl<CandidateInfo> &Candidates); + SmallVectorImpl<CandidateInfo> &Candidates, + MachineLoop *CurLoop); - void AddToLiveIns(MCRegister Reg); + void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop); - bool IsLICMCandidate(MachineInstr &I); + bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop); - bool IsLoopInvariantInst(MachineInstr &I); + bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop); - bool HasLoopPHIUse(const MachineInstr *MI) const; + bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop); - bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, - Register Reg) const; + bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg, + MachineLoop *CurLoop) const; bool IsCheapInstruction(MachineInstr &MI) const; @@ -226,9 +246,9 @@ namespace { void UpdateBackTraceRegPressure(const MachineInstr *MI); - bool IsProfitableToHoist(MachineInstr &MI); + bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop); - bool IsGuaranteedToExecute(MachineBasicBlock *BB); + bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop); bool isTriviallyReMaterializable(const MachineInstr &MI) const; @@ -241,7 +261,8 @@ namespace { DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); - void HoistOutOfLoop(MachineDomTreeNode *HeaderN); + void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader); void InitRegPressure(MachineBasicBlock *BB); @@ -252,7 +273,7 @@ namespace { void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); - MachineInstr *ExtractHoistableLoad(MachineInstr *MI); + MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop); MachineInstr *LookForDuplicate(const MachineInstr *MI, std::vector<MachineInstr *> &PrevMIs); @@ -263,13 +284,17 @@ namespace { bool MayCSE(MachineInstr *MI); - bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); + unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader, + MachineLoop *CurLoop); void InitCSEMap(MachineBasicBlock *BB); + void InitializeLoadsHoistableLoops(); + bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock, MachineBasicBlock *TgtBlock); - MachineBasicBlock *getCurPreheader(); + MachineBasicBlock *getCurPreheader(MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader); }; class MachineLICM : public MachineLICMBase { @@ -314,19 +339,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", "Early Machine Loop Invariant Code Motion", false, false) -/// Test if the given loop is the outer-most loop that has a unique predecessor. -static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { - // Check whether this loop even has a unique predecessor. - if (!CurLoop->getLoopPredecessor()) - return false; - // Ok, now check to see if any of its outer loops do. - for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) - if (L->getLoopPredecessor()) - return false; - // None of them did, so this is the outermost with a unique predecessor. - return true; -} - bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -366,29 +378,22 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis<MachineDominatorTree>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + if (HoistConstLoads) + InitializeLoadsHoistableLoops(); + SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); while (!Worklist.empty()) { - CurLoop = Worklist.pop_back_val(); - CurPreheader = nullptr; - ExitBlocks.clear(); - - // If this is done before regalloc, only visit outer-most preheader-sporting - // loops. - if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { - Worklist.append(CurLoop->begin(), CurLoop->end()); - continue; - } - - CurLoop->getExitBlocks(ExitBlocks); + MachineLoop *CurLoop = Worklist.pop_back_val(); + MachineBasicBlock *CurPreheader = nullptr; if (!PreRegAlloc) - HoistRegionPostRA(); + HoistRegionPostRA(CurLoop, CurPreheader); else { // CSEMap is initialized for loop header when the first instruction is // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); FirstInLoop = true; - HoistOutOfLoop(N); + HoistOutOfLoop(N, CurLoop, CurPreheader); CSEMap.clear(); } } @@ -420,11 +425,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { /// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. -void MachineLICMBase::ProcessMI(MachineInstr *MI, - BitVector &PhysRegDefs, +void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVectorImpl<CandidateInfo> &Candidates) { + SmallVectorImpl<CandidateInfo> &Candidates, + MachineLoop *CurLoop) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -502,7 +507,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, // operands. FIXME: Consider unfold load folding instructions. if (Def && !RuledOut) { int FI = std::numeric_limits<int>::min(); - if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || + if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) || (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI))) Candidates.push_back(CandidateInfo(MI, Def, FI)); } @@ -510,8 +515,9 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, /// Walk the specified region of the CFG and hoist loop invariants out to the /// preheader. -void MachineLICMBase::HoistRegionPostRA() { - MachineBasicBlock *Preheader = getCurPreheader(); +void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader) { + MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader); if (!Preheader) return; @@ -538,9 +544,14 @@ void MachineLICMBase::HoistRegionPostRA() { PhysRegDefs.set(*AI); } + // Funclet entry blocks will clobber all registers + if (const uint32_t *Mask = BB->getBeginClobberMask(TRI)) + PhysRegClobbers.setBitsNotInMask(Mask); + SpeculationState = SpeculateUnknown; for (MachineInstr &MI : *BB) - ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); + ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates, + CurLoop); } // Gather the registers read / clobbered by the terminator. @@ -588,14 +599,14 @@ void MachineLICMBase::HoistRegionPostRA() { } } if (Safe) - HoistPostRA(MI, Candidate.Def); + HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader); } } } /// Add register 'Reg' to the livein sets of BBs in the current loop, and make /// sure it is not killed by any instructions in the loop. -void MachineLICMBase::AddToLiveIns(MCRegister Reg) { +void MachineLICMBase::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) { for (MachineBasicBlock *BB : CurLoop->getBlocks()) { if (!BB->isLiveIn(Reg)) BB->addLiveIn(Reg); @@ -603,7 +614,7 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) { for (MachineOperand &MO : MI.all_uses()) { if (!MO.getReg()) continue; - if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) + if (TRI->regsOverlap(Reg, MO.getReg())) MO.setIsKill(false); } } @@ -612,8 +623,10 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) { /// When an instruction is found to only use loop invariant operands that is /// safe to hoist, this instruction is called to do the dirty work. -void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { - MachineBasicBlock *Preheader = getCurPreheader(); +void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def, + MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader) { + MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader); // Now move the instructions to the predecessor, inserting it before any // terminator instructions. @@ -634,7 +647,7 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is // important to ensure later passes do not scavenge the def register. - AddToLiveIns(Def); + AddToLiveIns(Def, CurLoop); ++NumPostRAHoisted; Changed = true; @@ -642,7 +655,8 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { /// Check if this mbb is guaranteed to execute. If not then a load from this mbb /// may not be safe to hoist. -bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { +bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB, + MachineLoop *CurLoop) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; @@ -713,8 +727,10 @@ void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, /// specified header block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. -void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { - MachineBasicBlock *Preheader = getCurPreheader(); +void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN, + MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader) { + MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader); if (!Preheader) return; @@ -778,10 +794,31 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { // Process the block SpeculationState = SpeculateUnknown; for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { - if (!Hoist(&MI, Preheader)) - UpdateRegPressure(&MI); - // If we have hoisted an instruction that may store, it can only be a - // constant store. + unsigned HoistRes = HoistResult::NotHoisted; + HoistRes = Hoist(&MI, Preheader, CurLoop); + if (HoistRes & HoistResult::NotHoisted) { + // We have failed to hoist MI to outermost loop's preheader. If MI is in + // a subloop, try to hoist it to subloop's preheader. + SmallVector<MachineLoop *> InnerLoopWorkList; + for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop; + L = L->getParentLoop()) + InnerLoopWorkList.push_back(L); + + while (!InnerLoopWorkList.empty()) { + MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val(); + MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); + if (InnerLoopPreheader) { + HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop); + if (HoistRes & HoistResult::Hoisted) + break; + } + } + } + + if (HoistRes & HoistResult::ErasedMI) + continue; + + UpdateRegPressure(&MI); } // If it's a leaf node, it's done. Traverse upwards to pop ancestors. @@ -966,9 +1003,9 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI, /// Returns true if the instruction may be a suitable candidate for LICM. /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. -bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { +bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) { // Check if it's safe to move the instruction. - bool DontMoveAcrossStore = true; + bool DontMoveAcrossStore = !HoistConstLoads || !AllowedToHoistLoads[CurLoop]; if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && !(HoistConstStores && isInvariantStore(I, TRI, MRI))) { LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n"); @@ -982,7 +1019,7 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { // from a jump table. // Stores and side effects are already checked by isSafeToMove. if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && - !IsGuaranteedToExecute(I.getParent())) { + !IsGuaranteedToExecute(I.getParent(), CurLoop)) { LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n"); return false; } @@ -1001,8 +1038,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { } /// Returns true if the instruction is loop invariant. -bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { - if (!IsLICMCandidate(I)) { +bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I, + MachineLoop *CurLoop) { + if (!IsLICMCandidate(I, CurLoop)) { LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n"); return false; } @@ -1011,8 +1049,9 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { /// Return true if the specified instruction is used by a phi node and hoisting /// it could cause a copy to be inserted. -bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { - SmallVector<const MachineInstr*, 8> Work(1, MI); +bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI, + MachineLoop *CurLoop) { + SmallVector<const MachineInstr *, 8> Work(1, MI); do { MI = Work.pop_back_val(); for (const MachineOperand &MO : MI->all_defs()) { @@ -1029,7 +1068,7 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { // A PHI in an exit block can cause a copy to be inserted if the PHI // has multiple predecessors in the loop with different values. // For now, approximate by rejecting all exit blocks. - if (isExitBlock(UseMI.getParent())) + if (isExitBlock(CurLoop, UseMI.getParent())) return true; continue; } @@ -1045,7 +1084,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { /// Compute operand latency between a def of 'Reg' and an use in the current /// loop, return true if the target considered it high. bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, - Register Reg) const { + Register Reg, + MachineLoop *CurLoop) const { if (MRI->use_nodbg_empty(Reg)) return false; @@ -1140,7 +1180,8 @@ void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) { /// Return true if it is potentially profitable to hoist the given loop /// invariant. -bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { +bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI, + MachineLoop *CurLoop) { if (MI.isImplicitDef()) return true; @@ -1160,7 +1201,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { return true; bool CheapInstr = IsCheapInstruction(MI); - bool CreatesCopy = HasLoopPHIUse(&MI); + bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop); // Don't hoist a cheap instruction if it would create a copy in the loop. if (CheapInstr && CreatesCopy) { @@ -1182,7 +1223,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { Register Reg = MO.getReg(); if (!Reg.isVirtual()) continue; - if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { + if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) { LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI); ++NumHighLatency; return true; @@ -1216,11 +1257,23 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { // instruction is not guaranteed to be executed in the loop, it's best to be // conservative. if (AvoidSpeculation && - (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) { LLVM_DEBUG(dbgs() << "Won't speculate: " << MI); return false; } + // If we have a COPY with other uses in the loop, hoist to allow the users to + // also be hoisted. + if (MI.isCopy() && MI.getOperand(0).isReg() && + MI.getOperand(0).getReg().isVirtual() && MI.getOperand(1).isReg() && + MI.getOperand(1).getReg().isVirtual() && + IsLoopInvariantInst(MI, CurLoop) && + any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()), + [&CurLoop](MachineInstr &UseMI) { + return CurLoop->contains(&UseMI); + })) + return true; + // High register pressure situation, only hoist if the instruction is going // to be remat'ed. if (!isTriviallyReMaterializable(MI) && @@ -1235,7 +1288,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { /// Unfold a load from the given machineinstr if the load itself could be /// hoisted. Return the unfolded and hoistable load, or null if the load /// couldn't be unfolded or if it wouldn't be hoistable. -MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { +MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI, + MachineLoop *CurLoop) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) return nullptr; @@ -1276,7 +1330,8 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { MBB->insert(Pos, NewMIs[1]); // If unfolding produced a load that wasn't loop-invariant or profitable to // hoist, discard the new instructions and bail. - if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { + if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) || + !IsProfitableToHoist(*NewMIs[0], CurLoop)) { NewMIs[0]->eraseFromParent(); NewMIs[1]->eraseFromParent(); return nullptr; @@ -1300,7 +1355,47 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { /// out of the loop. void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) { for (MachineInstr &MI : *BB) - CSEMap[MI.getOpcode()].push_back(&MI); + CSEMap[BB][MI.getOpcode()].push_back(&MI); +} + +/// Initialize AllowedToHoistLoads with information about whether invariant +/// loads can be moved outside a given loop +void MachineLICMBase::InitializeLoadsHoistableLoops() { + SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); + SmallVector<MachineLoop *, 8> LoopsInPreOrder; + + // Mark all loops as hoistable initially and prepare a list of loops in + // pre-order DFS. + while (!Worklist.empty()) { + auto *L = Worklist.pop_back_val(); + AllowedToHoistLoads[L] = true; + LoopsInPreOrder.push_back(L); + Worklist.insert(Worklist.end(), L->getSubLoops().begin(), + L->getSubLoops().end()); + } + + // Going from the innermost to outermost loops, check if a loop has + // instructions preventing invariant load hoisting. If such instruction is + // found, mark this loop and its parent as non-hoistable and continue + // investigating the next loop. + // Visiting in a reversed pre-ordered DFS manner + // allows us to not process all the instructions of the outer loop if the + // inner loop is proved to be non-load-hoistable. + for (auto *Loop : reverse(LoopsInPreOrder)) { + for (auto *MBB : Loop->blocks()) { + // If this loop has already been marked as non-hoistable, skip it. + if (!AllowedToHoistLoads[Loop]) + continue; + for (auto &MI : *MBB) { + if (!MI.mayStore() && !MI.isCall() && + !(MI.mayLoad() && MI.hasOrderedMemoryRef())) + continue; + for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop()) + AllowedToHoistLoads[L] = false; + break; + } + } + } } /// Find an instruction amount PrevMIs that is a duplicate of MI. @@ -1324,7 +1419,12 @@ bool MachineLICMBase::EliminateCSE( DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. - if (CI == CSEMap.end() || MI->isImplicitDef()) + if (MI->isImplicitDef()) + return false; + + // Do not CSE normal loads because between them could be store instructions + // that change the loaded value + if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad()) return false; if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { @@ -1380,21 +1480,32 @@ bool MachineLICMBase::EliminateCSE( /// Return true if the given instruction will be CSE'd if it's hoisted out of /// the loop. bool MachineLICMBase::MayCSE(MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = - CSEMap.find(Opcode); - // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate - // the undef property onto uses. - if (CI == CSEMap.end() || MI->isImplicitDef()) + if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad()) return false; - return LookForDuplicate(MI, CI->second) != nullptr; + unsigned Opcode = MI->getOpcode(); + for (auto &Map : CSEMap) { + // Check this CSEMap's preheader dominates MI's basic block. + if (DT->dominates(Map.first, MI->getParent())) { + DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = + Map.second.find(Opcode); + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == Map.second.end() || MI->isImplicitDef()) + continue; + if (LookForDuplicate(MI, CI->second) != nullptr) + return true; + } + } + + return false; } /// When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. /// It returns true if the instruction is hoisted. -bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { +unsigned MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader, + MachineLoop *CurLoop) { MachineBasicBlock *SrcBlock = MI->getParent(); // Disable the instruction hoisting due to block hotness @@ -1402,13 +1513,17 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) && isTgtHotterThanSrc(SrcBlock, Preheader)) { ++NumNotHoistedDueToHotness; - return false; + return HoistResult::NotHoisted; } // First check whether we should hoist this instruction. - if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { + bool HasExtractHoistableLoad = false; + if (!IsLoopInvariantInst(*MI, CurLoop) || + !IsProfitableToHoist(*MI, CurLoop)) { // If not, try unfolding a hoistable load. - MI = ExtractHoistableLoad(MI); - if (!MI) return false; + MI = ExtractHoistableLoad(MI, CurLoop); + if (!MI) + return HoistResult::NotHoisted; + HasExtractHoistableLoad = true; } // If we have hoisted an instruction that may store, it can only be a constant @@ -1436,9 +1551,22 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Look for opportunity to CSE the hoisted instruction. unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = - CSEMap.find(Opcode); - if (!EliminateCSE(MI, CI)) { + bool HasCSEDone = false; + for (auto &Map : CSEMap) { + // Check this CSEMap's preheader dominates MI's basic block. + if (DT->dominates(Map.first, MI->getParent())) { + DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = + Map.second.find(Opcode); + if (CI != Map.second.end()) { + if (EliminateCSE(MI, CI)) { + HasCSEDone = true; + break; + } + } + } + } + + if (!HasCSEDone) { // Otherwise, splice the instruction to the preheader. Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); @@ -1458,21 +1586,21 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { if (!MO.isDead()) MRI->clearKillFlags(MO.getReg()); - // Add to the CSE map. - if (CI != CSEMap.end()) - CI->second.push_back(MI); - else - CSEMap[Opcode].push_back(MI); + CSEMap[Preheader][Opcode].push_back(MI); } ++NumHoisted; Changed = true; - return true; + if (HasCSEDone || HasExtractHoistableLoad) + return HoistResult::Hoisted | HoistResult::ErasedMI; + return HoistResult::Hoisted; } /// Get the preheader for the current loop, splitting a critical edge if needed. -MachineBasicBlock *MachineLICMBase::getCurPreheader() { +MachineBasicBlock * +MachineLICMBase::getCurPreheader(MachineLoop *CurLoop, + MachineBasicBlock *CurPreheader) { // Determine the block to which to hoist instructions. If we can't find a // suitable loop predecessor, we can't do any hoisting. |
