summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp49
1 files changed, 25 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 4558ddf6dbfe..2592584b89c6 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -123,10 +123,15 @@ bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n");
return false;
}
+ // Do not shrink True16 instructions pre-RA to avoid the restriction in
+ // register allocation from only being able to use 128 VGPRs
+ if (AMDGPU::isTrue16Inst(Op))
+ return false;
if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
- // Give up if there are any uses of the carry-out from instructions like
- // V_ADD_CO_U32. The shrunken form of the instruction would write it to vcc
- // instead of to a virtual register.
+ // Give up if there are any uses of the sdst in carry-out or VOPC.
+ // The shrunken form of the instruction would write it to vcc instead of to
+ // a virtual register. If we rewrote the uses the shrinking would be
+ // possible.
if (!MRI->use_nodbg_empty(SDst->getReg()))
return false;
}
@@ -211,10 +216,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
const bool MaskAllLanes =
RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
(void)MaskAllLanes;
- assert(MaskAllLanes ||
- !(TII->isVOPC(DPPOp) ||
- (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
- "VOPC cannot form DPP unless mask is full");
+ assert((MaskAllLanes ||
+ !(TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
+ TII->isVOPC(OrigOpE32)))) &&
+ "VOPC cannot form DPP unless mask is full");
auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
OrigMI.getDebugLoc(), TII->get(DPPOp))
@@ -267,8 +272,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
DPPInst.addImm(Mod0->getImm());
++NumOperands;
- } else if (AMDGPU::getNamedOperandIdx(DPPOp,
- AMDGPU::OpName::src0_modifiers) != -1) {
+ } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) {
DPPInst.addImm(0);
++NumOperands;
}
@@ -291,8 +295,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
DPPInst.addImm(Mod1->getImm());
++NumOperands;
- } else if (AMDGPU::getNamedOperandIdx(DPPOp,
- AMDGPU::OpName::src1_modifiers) != -1) {
+ } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1_modifiers)) {
DPPInst.addImm(0);
++NumOperands;
}
@@ -328,18 +331,16 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
if (HasVOP3DPP) {
auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
- if (ClampOpr &&
- AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) {
+ if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) {
DPPInst.addImm(ClampOpr->getImm());
}
auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
if (VdstInOpr &&
- AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) {
+ AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::vdst_in)) {
DPPInst.add(*VdstInOpr);
}
auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
- if (OmodOpr &&
- AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) {
+ if (OmodOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::omod)) {
DPPInst.addImm(OmodOpr->getImm());
}
// Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to
@@ -352,7 +353,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
Fail = true;
break;
}
- if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1)
+ if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel))
DPPInst.addImm(OpSel);
}
if (auto *OpSelHiOpr =
@@ -366,17 +367,15 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
Fail = true;
break;
}
- if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1)
+ if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel_hi))
DPPInst.addImm(OpSelHi);
}
auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
- if (NegOpr &&
- AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) {
+ if (NegOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_lo)) {
DPPInst.addImm(NegOpr->getImm());
}
auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
- if (NegHiOpr &&
- AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) {
+ if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) {
DPPInst.addImm(NegHiOpr->getImm());
}
}
@@ -600,6 +599,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
LLVM_DEBUG(dbgs() << " try: " << OrigMI);
auto OrigOp = OrigMI.getOpcode();
+ assert((TII->get(OrigOp).getSize() != 4 || !AMDGPU::isTrue16Inst(OrigOp)) &&
+ "There should not be e32 True16 instructions pre-RA");
if (OrigOp == AMDGPU::REG_SEQUENCE) {
Register FwdReg = OrigMI.getOperand(0).getReg();
unsigned FwdSubReg = 0;
@@ -704,7 +705,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
continue;
}
while (!S.second.empty())
- S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
+ S.first->getOperand(S.second.pop_back_val()).setIsUndef();
}
}
@@ -732,7 +733,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
++NumDPPMovsCombined;
} else {
auto Split = TII->expandMovDPP64(MI);
- for (auto M : { Split.first, Split.second }) {
+ for (auto *M : {Split.first, Split.second}) {
if (M && combineDPPMov(*M))
++NumDPPMovsCombined;
}