diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 41 |
1 files changed, 33 insertions, 8 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 04c9a6457944..e3f54d01eb22 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -10,6 +10,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOperand.h" @@ -32,6 +33,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass { DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping; SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors; + SmallVector<MachineOperand *, 1> KillFlagCandidates; Register isCopyFromExec(const MachineInstr &MI) const; Register isCopyToExec(const MachineInstr &MI) const; @@ -41,15 +43,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass { MachineBasicBlock::reverse_iterator findExecCopy(MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I) const; - bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, MCRegister Reg, bool UseLiveOuts = false, bool IgnoreStart = false) const; bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const; - MachineInstr *findInstrBackwards(MachineInstr &Origin, - std::function<bool(MachineInstr *)> Pred, - ArrayRef<MCRegister> NonModifiableRegs, - unsigned MaxInstructions = 20) const; + MachineInstr *findInstrBackwards( + MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred, + ArrayRef<MCRegister> NonModifiableRegs, + MachineInstr *Terminator = nullptr, + SmallVectorImpl<MachineOperand *> *KillFlagCandidates = nullptr, + unsigned MaxInstructions = 20) const; bool optimizeExecSequence(); void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI); bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr, @@ -325,11 +328,13 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { // Backwards-iterate from Origin (for n=MaxInstructions iterations) until either // the beginning of the BB is reached or Pred evaluates to true - which can be // an arbitrary condition based on the current MachineInstr, for instance an -// target instruction. Breaks prematurely by returning nullptr if one of the +// target instruction. Breaks prematurely by returning nullptr if one of the // registers given in NonModifiableRegs is modified by the current instruction. MachineInstr *SIOptimizeExecMasking::findInstrBackwards( MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred, - ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const { + ArrayRef<MCRegister> NonModifiableRegs, MachineInstr *Terminator, + SmallVectorImpl<MachineOperand *> *KillFlagCandidates, + unsigned MaxInstructions) const { MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(), E = Origin.getParent()->rend(); unsigned CurrentIteration = 0; @@ -344,6 +349,21 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards( for (MCRegister Reg : NonModifiableRegs) { if (A->modifiesRegister(Reg, TRI)) return nullptr; + + // Check for kills that appear after the terminator instruction, that + // would not be detected by clearKillFlags, since they will cause the + // register to be dead at a later place, causing the verifier to fail. + // We use the candidates to clear the kill flags later. + if (Terminator && KillFlagCandidates && A != Terminator && + A->killsRegister(Reg, TRI)) { + for (MachineOperand &MO : A->operands()) { + if (MO.isReg() && MO.isKill()) { + Register Candidate = MO.getReg(); + if (Candidate != Reg && TRI->regsOverlap(Candidate, Reg)) + KillFlagCandidates->push_back(&MO); + } + } + } } ++CurrentIteration; @@ -599,6 +619,9 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence( if (Src1->isReg()) MRI->clearKillFlags(Src1->getReg()); + for (MachineOperand *MO : KillFlagCandidates) + MO->setIsKill(false); + SaveExecInstr.eraseFromParent(); VCmp.eraseFromParent(); @@ -690,7 +713,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence( NonDefRegs.push_back(Src1->getReg()); if (!findInstrBackwards( - MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs)) + MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs, + VCmp, &KillFlagCandidates)) return; if (VCmp) @@ -777,6 +801,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { OrXors.clear(); SaveExecVCmpMapping.clear(); + KillFlagCandidates.clear(); static unsigned SearchWindow = 10; for (MachineBasicBlock &MBB : MF) { unsigned SearchCount = 0; |