diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 49 |
1 files changed, 25 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 4558ddf6dbfe..2592584b89c6 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -123,10 +123,15 @@ bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const { LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n"); return false; } + // Do not shrink True16 instructions pre-RA to avoid the restriction in + // register allocation from only being able to use 128 VGPRs + if (AMDGPU::isTrue16Inst(Op)) + return false; if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { - // Give up if there are any uses of the carry-out from instructions like - // V_ADD_CO_U32. The shrunken form of the instruction would write it to vcc - // instead of to a virtual register. + // Give up if there are any uses of the sdst in carry-out or VOPC. + // The shrunken form of the instruction would write it to vcc instead of to + // a virtual register. If we rewrote the uses the shrinking would be + // possible. if (!MRI->use_nodbg_empty(SDst->getReg())) return false; } @@ -211,10 +216,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF; (void)MaskAllLanes; - assert(MaskAllLanes || - !(TII->isVOPC(DPPOp) || - (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) && - "VOPC cannot form DPP unless mask is full"); + assert((MaskAllLanes || + !(TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && + TII->isVOPC(OrigOpE32)))) && + "VOPC cannot form DPP unless mask is full"); auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI, OrigMI.getDebugLoc(), TII->get(DPPOp)) @@ -267,8 +272,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; - } else if (AMDGPU::getNamedOperandIdx(DPPOp, - AMDGPU::OpName::src0_modifiers) != -1) { + } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) { DPPInst.addImm(0); ++NumOperands; } @@ -291,8 +295,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; - } else if (AMDGPU::getNamedOperandIdx(DPPOp, - AMDGPU::OpName::src1_modifiers) != -1) { + } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1_modifiers)) { DPPInst.addImm(0); ++NumOperands; } @@ -328,18 +331,16 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, } if (HasVOP3DPP) { auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp); - if (ClampOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) { + if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) { DPPInst.addImm(ClampOpr->getImm()); } auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in); if (VdstInOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) { + AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::vdst_in)) { DPPInst.add(*VdstInOpr); } auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod); - if (OmodOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) { + if (OmodOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::omod)) { DPPInst.addImm(OmodOpr->getImm()); } // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to @@ -352,7 +353,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, Fail = true; break; } - if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1) + if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel)) DPPInst.addImm(OpSel); } if (auto *OpSelHiOpr = @@ -366,17 +367,15 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, Fail = true; break; } - if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1) + if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel_hi)) DPPInst.addImm(OpSelHi); } auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo); - if (NegOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) { + if (NegOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_lo)) { DPPInst.addImm(NegOpr->getImm()); } auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi); - if (NegHiOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) { + if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) { DPPInst.addImm(NegHiOpr->getImm()); } } @@ -600,6 +599,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { LLVM_DEBUG(dbgs() << " try: " << OrigMI); auto OrigOp = OrigMI.getOpcode(); + assert((TII->get(OrigOp).getSize() != 4 || !AMDGPU::isTrue16Inst(OrigOp)) && + "There should not be e32 True16 instructions pre-RA"); if (OrigOp == AMDGPU::REG_SEQUENCE) { Register FwdReg = OrigMI.getOperand(0).getReg(); unsigned FwdSubReg = 0; @@ -704,7 +705,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { continue; } while (!S.second.empty()) - S.first->getOperand(S.second.pop_back_val()).setIsUndef(true); + S.first->getOperand(S.second.pop_back_val()).setIsUndef(); } } @@ -732,7 +733,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { ++NumDPPMovsCombined; } else { auto Split = TII->expandMovDPP64(MI); - for (auto M : { Split.first, Split.second }) { + for (auto *M : {Split.first, Split.second}) { if (M && combineDPPMov(*M)) ++NumDPPMovsCombined; } |
