aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineLICM.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachineLICM.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp344
1 files changed, 236 insertions, 108 deletions
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 4e80e9b58c06..efc19f8fdbf8 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -72,6 +72,11 @@ static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
+
+static cl::opt<bool> HoistConstLoads("hoist-const-loads",
+ cl::desc("Hoist invariant loads"),
+ cl::init(true), cl::Hidden);
+
// The default threshold of 100 (i.e. if target block is 100 times hotter)
// is based on empirical data on a single target and is subject to tuning.
static cl::opt<unsigned>
@@ -110,6 +115,7 @@ STATISTIC(NumNotHoistedDueToHotness,
"Number of instructions not hoisted due to block frequency");
namespace {
+ enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
class MachineLICMBase : public MachineFunctionPass {
const TargetInstrInfo *TII = nullptr;
@@ -130,13 +136,21 @@ namespace {
// State that is updated as we process loops
bool Changed = false; // True if a loop is changed.
bool FirstInLoop = false; // True if it's the first LICM in the loop.
- MachineLoop *CurLoop = nullptr; // The current loop we are working on.
- MachineBasicBlock *CurPreheader = nullptr; // The preheader for CurLoop.
- // Exit blocks for CurLoop.
- SmallVector<MachineBasicBlock *, 8> ExitBlocks;
+ // Holds information about whether it is allowed to move load instructions
+ // out of the loop
+ SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
+
+ // Exit blocks of each Loop.
+ DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
+
+ bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
+ if (ExitBlockMap.contains(CurLoop))
+ return is_contained(ExitBlockMap[CurLoop], MBB);
- bool isExitBlock(const MachineBasicBlock *MBB) const {
+ SmallVector<MachineBasicBlock *, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+ ExitBlockMap[CurLoop] = ExitBlocks;
return is_contained(ExitBlocks, MBB);
}
@@ -151,8 +165,10 @@ namespace {
// Register pressure on path leading from loop preheader to current BB.
SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
- // For each opcode, keep a list of potential CSE instructions.
- DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap;
+ // For each opcode per preheader, keep a list of potential CSE instructions.
+ DenseMap<MachineBasicBlock *,
+ DenseMap<unsigned, std::vector<MachineInstr *>>>
+ CSEMap;
enum {
SpeculateFalse = 0,
@@ -187,6 +203,7 @@ namespace {
RegLimit.clear();
BackTrace.clear();
CSEMap.clear();
+ ExitBlockMap.clear();
}
private:
@@ -200,24 +217,27 @@ namespace {
: MI(mi), Def(def), FI(fi) {}
};
- void HoistRegionPostRA();
+ void HoistRegionPostRA(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
- void HoistPostRA(MachineInstr *MI, unsigned Def);
+ void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
- SmallVectorImpl<CandidateInfo> &Candidates);
+ SmallVectorImpl<CandidateInfo> &Candidates,
+ MachineLoop *CurLoop);
- void AddToLiveIns(MCRegister Reg);
+ void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
- bool IsLICMCandidate(MachineInstr &I);
+ bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
- bool IsLoopInvariantInst(MachineInstr &I);
+ bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
- bool HasLoopPHIUse(const MachineInstr *MI) const;
+ bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
- bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
- Register Reg) const;
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,
+ MachineLoop *CurLoop) const;
bool IsCheapInstruction(MachineInstr &MI) const;
@@ -226,9 +246,9 @@ namespace {
void UpdateBackTraceRegPressure(const MachineInstr *MI);
- bool IsProfitableToHoist(MachineInstr &MI);
+ bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);
- bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
bool isTriviallyReMaterializable(const MachineInstr &MI) const;
@@ -241,7 +261,8 @@ namespace {
DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
- void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
+ void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
void InitRegPressure(MachineBasicBlock *BB);
@@ -252,7 +273,7 @@ namespace {
void UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef = false);
- MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);
MachineInstr *LookForDuplicate(const MachineInstr *MI,
std::vector<MachineInstr *> &PrevMIs);
@@ -263,13 +284,17 @@ namespace {
bool MayCSE(MachineInstr *MI);
- bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+ unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
+ MachineLoop *CurLoop);
void InitCSEMap(MachineBasicBlock *BB);
+ void InitializeLoadsHoistableLoops();
+
bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
MachineBasicBlock *TgtBlock);
- MachineBasicBlock *getCurPreheader();
+ MachineBasicBlock *getCurPreheader(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader);
};
class MachineLICM : public MachineLICMBase {
@@ -314,19 +339,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)
-/// Test if the given loop is the outer-most loop that has a unique predecessor.
-static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
- // Check whether this loop even has a unique predecessor.
- if (!CurLoop->getLoopPredecessor())
- return false;
- // Ok, now check to see if any of its outer loops do.
- for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
- if (L->getLoopPredecessor())
- return false;
- // None of them did, so this is the outermost with a unique predecessor.
- return true;
-}
-
bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -366,29 +378,22 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTree>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ if (HoistConstLoads)
+ InitializeLoadsHoistableLoops();
+
SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
while (!Worklist.empty()) {
- CurLoop = Worklist.pop_back_val();
- CurPreheader = nullptr;
- ExitBlocks.clear();
-
- // If this is done before regalloc, only visit outer-most preheader-sporting
- // loops.
- if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
- Worklist.append(CurLoop->begin(), CurLoop->end());
- continue;
- }
-
- CurLoop->getExitBlocks(ExitBlocks);
+ MachineLoop *CurLoop = Worklist.pop_back_val();
+ MachineBasicBlock *CurPreheader = nullptr;
if (!PreRegAlloc)
- HoistRegionPostRA();
+ HoistRegionPostRA(CurLoop, CurPreheader);
else {
// CSEMap is initialized for loop header when the first instruction is
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
FirstInLoop = true;
- HoistOutOfLoop(N);
+ HoistOutOfLoop(N, CurLoop, CurPreheader);
CSEMap.clear();
}
}
@@ -420,11 +425,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
/// Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
-void MachineLICMBase::ProcessMI(MachineInstr *MI,
- BitVector &PhysRegDefs,
+void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
BitVector &PhysRegClobbers,
SmallSet<int, 32> &StoredFIs,
- SmallVectorImpl<CandidateInfo> &Candidates) {
+ SmallVectorImpl<CandidateInfo> &Candidates,
+ MachineLoop *CurLoop) {
bool RuledOut = false;
bool HasNonInvariantUse = false;
unsigned Def = 0;
@@ -502,7 +507,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
// operands. FIXME: Consider unfold load folding instructions.
if (Def && !RuledOut) {
int FI = std::numeric_limits<int>::min();
- if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
(TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
Candidates.push_back(CandidateInfo(MI, Def, FI));
}
@@ -510,8 +515,9 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
/// Walk the specified region of the CFG and hoist loop invariants out to the
/// preheader.
-void MachineLICMBase::HoistRegionPostRA() {
- MachineBasicBlock *Preheader = getCurPreheader();
+void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
+ MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
if (!Preheader)
return;
@@ -538,9 +544,14 @@ void MachineLICMBase::HoistRegionPostRA() {
PhysRegDefs.set(*AI);
}
+ // Funclet entry blocks will clobber all registers
+ if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
+ PhysRegClobbers.setBitsNotInMask(Mask);
+
SpeculationState = SpeculateUnknown;
for (MachineInstr &MI : *BB)
- ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates,
+ CurLoop);
}
// Gather the registers read / clobbered by the terminator.
@@ -588,14 +599,14 @@ void MachineLICMBase::HoistRegionPostRA() {
}
}
if (Safe)
- HoistPostRA(MI, Candidate.Def);
+ HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
}
}
}
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
/// sure it is not killed by any instructions in the loop.
-void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
+void MachineLICMBase::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {
for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
@@ -603,7 +614,7 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
for (MachineOperand &MO : MI.all_uses()) {
if (!MO.getReg())
continue;
- if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+ if (TRI->regsOverlap(Reg, MO.getReg()))
MO.setIsKill(false);
}
}
@@ -612,8 +623,10 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
/// When an instruction is found to only use loop invariant operands that is
/// safe to hoist, this instruction is called to do the dirty work.
-void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
- MachineBasicBlock *Preheader = getCurPreheader();
+void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def,
+ MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
+ MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
@@ -634,7 +647,7 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
// Add register to livein list to all the BBs in the current loop since a
// loop invariant must be kept live throughout the whole loop. This is
// important to ensure later passes do not scavenge the def register.
- AddToLiveIns(Def);
+ AddToLiveIns(Def, CurLoop);
++NumPostRAHoisted;
Changed = true;
@@ -642,7 +655,8 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
/// may not be safe to hoist.
-bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB,
+ MachineLoop *CurLoop) {
if (SpeculationState != SpeculateUnknown)
return SpeculationState == SpeculateFalse;
@@ -713,8 +727,10 @@ void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
/// specified header block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
-void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
- MachineBasicBlock *Preheader = getCurPreheader();
+void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
+ MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
+ MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
if (!Preheader)
return;
@@ -778,10 +794,31 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// Process the block
SpeculationState = SpeculateUnknown;
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
- if (!Hoist(&MI, Preheader))
- UpdateRegPressure(&MI);
- // If we have hoisted an instruction that may store, it can only be a
- // constant store.
+ unsigned HoistRes = HoistResult::NotHoisted;
+ HoistRes = Hoist(&MI, Preheader, CurLoop);
+ if (HoistRes & HoistResult::NotHoisted) {
+ // We have failed to hoist MI to outermost loop's preheader. If MI is in
+ // a subloop, try to hoist it to subloop's preheader.
+ SmallVector<MachineLoop *> InnerLoopWorkList;
+ for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;
+ L = L->getParentLoop())
+ InnerLoopWorkList.push_back(L);
+
+ while (!InnerLoopWorkList.empty()) {
+ MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
+ MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
+ if (InnerLoopPreheader) {
+ HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
+ if (HoistRes & HoistResult::Hoisted)
+ break;
+ }
+ }
+ }
+
+ if (HoistRes & HoistResult::ErasedMI)
+ continue;
+
+ UpdateRegPressure(&MI);
}
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
@@ -966,9 +1003,9 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
/// Returns true if the instruction may be a suitable candidate for LICM.
/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
-bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
+bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
// Check if it's safe to move the instruction.
- bool DontMoveAcrossStore = true;
+ bool DontMoveAcrossStore = !HoistConstLoads || !AllowedToHoistLoads[CurLoop];
if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
!(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
@@ -982,7 +1019,7 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
// from a jump table.
// Stores and side effects are already checked by isSafeToMove.
if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
- !IsGuaranteedToExecute(I.getParent())) {
+ !IsGuaranteedToExecute(I.getParent(), CurLoop)) {
LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
return false;
}
@@ -1001,8 +1038,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
}
/// Returns true if the instruction is loop invariant.
-bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
- if (!IsLICMCandidate(I)) {
+bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I,
+ MachineLoop *CurLoop) {
+ if (!IsLICMCandidate(I, CurLoop)) {
LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
return false;
}
@@ -1011,8 +1049,9 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
/// Return true if the specified instruction is used by a phi node and hoisting
/// it could cause a copy to be inserted.
-bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
- SmallVector<const MachineInstr*, 8> Work(1, MI);
+bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI,
+ MachineLoop *CurLoop) {
+ SmallVector<const MachineInstr *, 8> Work(1, MI);
do {
MI = Work.pop_back_val();
for (const MachineOperand &MO : MI->all_defs()) {
@@ -1029,7 +1068,7 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
// A PHI in an exit block can cause a copy to be inserted if the PHI
// has multiple predecessors in the loop with different values.
// For now, approximate by rejecting all exit blocks.
- if (isExitBlock(UseMI.getParent()))
+ if (isExitBlock(CurLoop, UseMI.getParent()))
return true;
continue;
}
@@ -1045,7 +1084,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
/// Compute operand latency between a def of 'Reg' and an use in the current
/// loop, return true if the target considered it high.
bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
- Register Reg) const {
+ Register Reg,
+ MachineLoop *CurLoop) const {
if (MRI->use_nodbg_empty(Reg))
return false;
@@ -1140,7 +1180,8 @@ void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
/// Return true if it is potentially profitable to hoist the given loop
/// invariant.
-bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
+bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
+ MachineLoop *CurLoop) {
if (MI.isImplicitDef())
return true;
@@ -1160,7 +1201,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
return true;
bool CheapInstr = IsCheapInstruction(MI);
- bool CreatesCopy = HasLoopPHIUse(&MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);
// Don't hoist a cheap instruction if it would create a copy in the loop.
if (CheapInstr && CreatesCopy) {
@@ -1182,7 +1223,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
- if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+ if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {
LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
++NumHighLatency;
return true;
@@ -1216,11 +1257,23 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// instruction is not guaranteed to be executed in the loop, it's best to be
// conservative.
if (AvoidSpeculation &&
- (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {
LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
return false;
}
+ // If we have a COPY with other uses in the loop, hoist to allow the users to
+ // also be hoisted.
+ if (MI.isCopy() && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).getReg().isVirtual() && MI.getOperand(1).isReg() &&
+ MI.getOperand(1).getReg().isVirtual() &&
+ IsLoopInvariantInst(MI, CurLoop) &&
+ any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&CurLoop](MachineInstr &UseMI) {
+ return CurLoop->contains(&UseMI);
+ }))
+ return true;
+
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
if (!isTriviallyReMaterializable(MI) &&
@@ -1235,7 +1288,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
/// Unfold a load from the given machineinstr if the load itself could be
/// hoisted. Return the unfolded and hoistable load, or null if the load
/// couldn't be unfolded or if it wouldn't be hoistable.
-MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
+MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI,
+ MachineLoop *CurLoop) {
// Don't unfold simple loads.
if (MI->canFoldAsLoad())
return nullptr;
@@ -1276,7 +1330,8 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
MBB->insert(Pos, NewMIs[1]);
// If unfolding produced a load that wasn't loop-invariant or profitable to
// hoist, discard the new instructions and bail.
- if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
+ !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
return nullptr;
@@ -1300,7 +1355,47 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
/// out of the loop.
void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
for (MachineInstr &MI : *BB)
- CSEMap[MI.getOpcode()].push_back(&MI);
+ CSEMap[BB][MI.getOpcode()].push_back(&MI);
+}
+
+/// Initialize AllowedToHoistLoads with information about whether invariant
+/// loads can be moved outside a given loop
+void MachineLICMBase::InitializeLoadsHoistableLoops() {
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ SmallVector<MachineLoop *, 8> LoopsInPreOrder;
+
+ // Mark all loops as hoistable initially and prepare a list of loops in
+ // pre-order DFS.
+ while (!Worklist.empty()) {
+ auto *L = Worklist.pop_back_val();
+ AllowedToHoistLoads[L] = true;
+ LoopsInPreOrder.push_back(L);
+ Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
+ L->getSubLoops().end());
+ }
+
+ // Going from the innermost to outermost loops, check if a loop has
+ // instructions preventing invariant load hoisting. If such instruction is
+ // found, mark this loop and its parent as non-hoistable and continue
+ // investigating the next loop.
+ // Visiting in a reversed pre-ordered DFS manner
+ // allows us to not process all the instructions of the outer loop if the
+ // inner loop is proved to be non-load-hoistable.
+ for (auto *Loop : reverse(LoopsInPreOrder)) {
+ for (auto *MBB : Loop->blocks()) {
+ // If this loop has already been marked as non-hoistable, skip it.
+ if (!AllowedToHoistLoads[Loop])
+ continue;
+ for (auto &MI : *MBB) {
+ if (!MI.mayStore() && !MI.isCall() &&
+ !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
+ continue;
+ for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
+ AllowedToHoistLoads[L] = false;
+ break;
+ }
+ }
+ }
}
/// Find an instruction amount PrevMIs that is a duplicate of MI.
@@ -1324,7 +1419,12 @@ bool MachineLICMBase::EliminateCSE(
DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
- if (CI == CSEMap.end() || MI->isImplicitDef())
+ if (MI->isImplicitDef())
+ return false;
+
+ // Do not CSE normal loads because between them could be store instructions
+ // that change the loaded value
+ if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
return false;
if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
@@ -1380,21 +1480,32 @@ bool MachineLICMBase::EliminateCSE(
/// Return true if the given instruction will be CSE'd if it's hoisted out of
/// the loop.
bool MachineLICMBase::MayCSE(MachineInstr *MI) {
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
- CSEMap.find(Opcode);
- // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
- // the undef property onto uses.
- if (CI == CSEMap.end() || MI->isImplicitDef())
+ if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
return false;
- return LookForDuplicate(MI, CI->second) != nullptr;
+ unsigned Opcode = MI->getOpcode();
+ for (auto &Map : CSEMap) {
+ // Check this CSEMap's preheader dominates MI's basic block.
+ if (DT->dominates(Map.first, MI->getParent())) {
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ Map.second.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == Map.second.end() || MI->isImplicitDef())
+ continue;
+ if (LookForDuplicate(MI, CI->second) != nullptr)
+ return true;
+ }
+ }
+
+ return false;
}
/// When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
-bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+unsigned MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
+ MachineLoop *CurLoop) {
MachineBasicBlock *SrcBlock = MI->getParent();
// Disable the instruction hoisting due to block hotness
@@ -1402,13 +1513,17 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
(DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
isTgtHotterThanSrc(SrcBlock, Preheader)) {
++NumNotHoistedDueToHotness;
- return false;
+ return HoistResult::NotHoisted;
}
// First check whether we should hoist this instruction.
- if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ bool HasExtractHoistableLoad = false;
+ if (!IsLoopInvariantInst(*MI, CurLoop) ||
+ !IsProfitableToHoist(*MI, CurLoop)) {
// If not, try unfolding a hoistable load.
- MI = ExtractHoistableLoad(MI);
- if (!MI) return false;
+ MI = ExtractHoistableLoad(MI, CurLoop);
+ if (!MI)
+ return HoistResult::NotHoisted;
+ HasExtractHoistableLoad = true;
}
// If we have hoisted an instruction that may store, it can only be a constant
@@ -1436,9 +1551,22 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Look for opportunity to CSE the hoisted instruction.
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
- CSEMap.find(Opcode);
- if (!EliminateCSE(MI, CI)) {
+ bool HasCSEDone = false;
+ for (auto &Map : CSEMap) {
+ // Check this CSEMap's preheader dominates MI's basic block.
+ if (DT->dominates(Map.first, MI->getParent())) {
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ Map.second.find(Opcode);
+ if (CI != Map.second.end()) {
+ if (EliminateCSE(MI, CI)) {
+ HasCSEDone = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!HasCSEDone) {
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
@@ -1458,21 +1586,21 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
if (!MO.isDead())
MRI->clearKillFlags(MO.getReg());
- // Add to the CSE map.
- if (CI != CSEMap.end())
- CI->second.push_back(MI);
- else
- CSEMap[Opcode].push_back(MI);
+ CSEMap[Preheader][Opcode].push_back(MI);
}
++NumHoisted;
Changed = true;
- return true;
+ if (HasCSEDone || HasExtractHoistableLoad)
+ return HoistResult::Hoisted | HoistResult::ErasedMI;
+ return HoistResult::Hoisted;
}
/// Get the preheader for the current loop, splitting a critical edge if needed.
-MachineBasicBlock *MachineLICMBase::getCurPreheader() {
+MachineBasicBlock *
+MachineLICMBase::getCurPreheader(MachineLoop *CurLoop,
+ MachineBasicBlock *CurPreheader) {
// Determine the block to which to hoist instructions. If we can't find a
// suitable loop predecessor, we can't do any hoisting.