aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:11:55 +0000
commit5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
parent3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp41
1 files changed, 33 insertions, 8 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 04c9a6457944..e3f54d01eb22 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -10,6 +10,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -32,6 +33,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors;
+ SmallVector<MachineOperand *, 1> KillFlagCandidates;
Register isCopyFromExec(const MachineInstr &MI) const;
Register isCopyToExec(const MachineInstr &MI) const;
@@ -41,15 +43,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
MachineBasicBlock::reverse_iterator
findExecCopy(MachineBasicBlock &MBB,
MachineBasicBlock::reverse_iterator I) const;
-
bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start,
MCRegister Reg, bool UseLiveOuts = false,
bool IgnoreStart = false) const;
bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const;
- MachineInstr *findInstrBackwards(MachineInstr &Origin,
- std::function<bool(MachineInstr *)> Pred,
- ArrayRef<MCRegister> NonModifiableRegs,
- unsigned MaxInstructions = 20) const;
+ MachineInstr *findInstrBackwards(
+ MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred,
+ ArrayRef<MCRegister> NonModifiableRegs,
+ MachineInstr *Terminator = nullptr,
+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates = nullptr,
+ unsigned MaxInstructions = 20) const;
bool optimizeExecSequence();
void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI);
bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr,
@@ -325,11 +328,13 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
// Backwards-iterate from Origin (for n=MaxInstructions iterations) until either
// the beginning of the BB is reached or Pred evaluates to true - which can be
// an arbitrary condition based on the current MachineInstr, for instance an
-// target instruction. Breaks prematurely by returning nullptr if one of the
+// target instruction. Breaks prematurely by returning nullptr if one of the
// registers given in NonModifiableRegs is modified by the current instruction.
MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred,
- ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const {
+ ArrayRef<MCRegister> NonModifiableRegs, MachineInstr *Terminator,
+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates,
+ unsigned MaxInstructions) const {
MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(),
E = Origin.getParent()->rend();
unsigned CurrentIteration = 0;
@@ -344,6 +349,21 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
for (MCRegister Reg : NonModifiableRegs) {
if (A->modifiesRegister(Reg, TRI))
return nullptr;
+
+ // Check for kills that appear after the terminator instruction, that
+ // would not be detected by clearKillFlags, since they will cause the
+ // register to be dead at a later place, causing the verifier to fail.
+ // We use the candidates to clear the kill flags later.
+ if (Terminator && KillFlagCandidates && A != Terminator &&
+ A->killsRegister(Reg, TRI)) {
+ for (MachineOperand &MO : A->operands()) {
+ if (MO.isReg() && MO.isKill()) {
+ Register Candidate = MO.getReg();
+ if (Candidate != Reg && TRI->regsOverlap(Candidate, Reg))
+ KillFlagCandidates->push_back(&MO);
+ }
+ }
+ }
}
++CurrentIteration;
@@ -599,6 +619,9 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence(
if (Src1->isReg())
MRI->clearKillFlags(Src1->getReg());
+ for (MachineOperand *MO : KillFlagCandidates)
+ MO->setIsKill(false);
+
SaveExecInstr.eraseFromParent();
VCmp.eraseFromParent();
@@ -690,7 +713,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence(
NonDefRegs.push_back(Src1->getReg());
if (!findInstrBackwards(
- MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs))
+ MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs,
+ VCmp, &KillFlagCandidates))
return;
if (VCmp)
@@ -777,6 +801,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
OrXors.clear();
SaveExecVCmpMapping.clear();
+ KillFlagCandidates.clear();
static unsigned SearchWindow = 10;
for (MachineBasicBlock &MBB : MF) {
unsigned SearchCount = 0;