diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | 125 |
1 files changed, 91 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp index e13e33ed5457..2ae3157bab49 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp @@ -112,8 +112,10 @@ public: SmallVectorImpl<Register> &CandidateRegs) const; void collectWaterfallCandidateRegisters( - MachineBasicBlock *Loop, - SmallSetVector<Register, 16> &CandidateRegs) const; + MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, + SmallSetVector<Register, 16> &CandidateRegs, + SmallSetVector<MachineBasicBlock *, 2> &Blocks, + SmallVectorImpl<MachineInstr *> &Instructions) const; void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Uses) const; @@ -131,7 +133,10 @@ public: MachineBasicBlock *Flow, MachineBasicBlock *Endif, SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; - void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const; + void optimizeWaterfallLiveRange( + Register Reg, MachineBasicBlock *LoopHeader, + SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks, + SmallVectorImpl<MachineInstr *> &Instructions) const; SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} @@ -323,12 +328,34 @@ void SIOptimizeVGPRLiveRange::collectCandidateRegisters( /// Collect the registers used in the waterfall loop block that are defined /// before. void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( - MachineBasicBlock *Loop, - SmallSetVector<Register, 16> &CandidateRegs) const { + MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, + SmallSetVector<Register, 16> &CandidateRegs, + SmallSetVector<MachineBasicBlock *, 2> &Blocks, + SmallVectorImpl<MachineInstr *> &Instructions) const { - for (auto &MI : Loop->instrs()) { - if (MI.isDebugInstr()) - continue; + // Collect loop instructions, potentially spanning multiple blocks + auto *MBB = LoopHeader; + for (;;) { + Blocks.insert(MBB); + for (auto &MI : *MBB) { + if (MI.isDebugInstr()) + continue; + Instructions.push_back(&MI); + } + if (MBB == LoopEnd) + break; + + if ((MBB != LoopHeader && MBB->pred_size() != 1) || + (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) { + LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n"); + return; + } + + MBB = *MBB->succ_begin(); + } + + for (auto *I : Instructions) { + auto &MI = *I; for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.getReg() || MO.isDef()) @@ -340,16 +367,17 @@ void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( continue; if (MO.readsReg()) { - const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); + MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); // Make sure the value is defined before the LOOP block - if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) { + if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) { // If the variable is used after the loop, the register coalescer will // merge the newly created register and remove the phi node again. // Just do nothing in that case. LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg); bool IsUsed = false; - for (auto *Succ : Loop->successors()) { - if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { + for (auto *Succ : LoopEnd->successors()) { + if (!Blocks.contains(Succ) && + OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { IsUsed = true; break; } @@ -513,7 +541,9 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange( } void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( - Register Reg, MachineBasicBlock *Loop) const { + Register Reg, MachineBasicBlock *LoopHeader, + SmallSetVector<MachineBasicBlock *, 2> &Blocks, + SmallVectorImpl<MachineInstr *> &Instructions) const { // Insert a new PHI, marking the value from the last loop iteration undef. LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); const auto *RC = MRI->getRegClass(Reg); @@ -525,15 +555,16 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { auto *UseMI = O.getParent(); auto *UseBlock = UseMI->getParent(); - // Replace uses in Loop block - if (UseBlock == Loop) + // Replace uses in Loop blocks + if (Blocks.contains(UseBlock)) O.setReg(NewReg); } - MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - for (auto *Pred : Loop->predecessors()) { - if (Pred == Loop) + MachineInstrBuilder PHI = + BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + for (auto *Pred : LoopHeader->predecessors()) { + if (Blocks.contains(Pred)) PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); else PHI.addReg(Reg).addMBB(Pred); @@ -542,21 +573,36 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); - // collectWaterfallCandidateRegisters only collects registers that are dead - // after the loop. So we know that the old reg is not live throughout the - // whole block anymore. - OldVarInfo.AliveBlocks.reset(Loop->getNumber()); - - // Mark the last use as kill - for (auto &MI : reverse(Loop->instrs())) { - if (MI.readsRegister(NewReg, TRI)) { - MI.addRegisterKilled(NewReg, TRI); - NewVarInfo.Kills.push_back(&MI); + // Find last use and mark as kill + MachineInstr *Kill = nullptr; + for (auto *MI : reverse(Instructions)) { + if (MI->readsRegister(NewReg, TRI)) { + MI->addRegisterKilled(NewReg, TRI); + NewVarInfo.Kills.push_back(MI); + Kill = MI; break; } } - assert(!NewVarInfo.Kills.empty() && - "Failed to find last usage of register in loop"); + assert(Kill && "Failed to find last usage of register in loop"); + + MachineBasicBlock *KillBlock = Kill->getParent(); + bool PostKillBlock = false; + for (auto *Block : Blocks) { + auto BBNum = Block->getNumber(); + + // collectWaterfallCandidateRegisters only collects registers that are dead + // after the loop. So we know that the old reg is no longer live throughout + // the waterfall loop. + OldVarInfo.AliveBlocks.reset(BBNum); + + // The new register is live up to (and including) the block that kills it. + PostKillBlock |= (Block == KillBlock); + if (PostKillBlock) { + NewVarInfo.AliveBlocks.reset(BBNum); + } else if (Block != LoopHeader) { + NewVarInfo.AliveBlocks.set(BBNum); + } + } } char SIOptimizeVGPRLiveRange::ID = 0; @@ -601,6 +647,10 @@ bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { if (!Endif) continue; + // Skip unexpected control flow. + if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif)) + continue; + SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; SmallVector<Register> CandidateRegs; @@ -620,15 +670,22 @@ bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { for (auto Reg : CandidateRegs) optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { + auto *LoopHeader = MI.getOperand(0).getMBB(); + auto *LoopEnd = &MBB; + LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " - << printMBBReference(MBB) << '\n'); + << printMBBReference(*LoopHeader) << '\n'); SmallSetVector<Register, 16> CandidateRegs; - collectWaterfallCandidateRegisters(&MBB, CandidateRegs); + SmallVector<MachineInstr *, 16> Instructions; + SmallSetVector<MachineBasicBlock *, 2> Blocks; + + collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs, + Blocks, Instructions); MadeChange |= !CandidateRegs.empty(); // Now we are safe to optimize. for (auto Reg : CandidateRegs) - optimizeWaterfallLiveRange(Reg, &MBB); + optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions); } } } |
