diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 127 |
1 files changed, 110 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 1cd880eaa48e..5d254518c67a 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -143,13 +143,20 @@ bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const { } int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const { - auto DPP32 = AMDGPU::getDPPOp32(Op); + int DPP32 = AMDGPU::getDPPOp32(Op); if (IsShrinkable) { assert(DPP32 == -1); - auto E32 = AMDGPU::getVOPe32(Op); + int E32 = AMDGPU::getVOPe32(Op); DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32); } - return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32; + if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1) + return DPP32; + int DPP64 = -1; + if (ST->hasVOP3DPP()) + DPP64 = AMDGPU::getDPPOp64(Op); + if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1) + return DPP64; + return -1; } // tracks the register operand definition and returns: @@ -188,6 +195,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); + bool HasVOP3DPP = ST->hasVOP3DPP(); auto OrigOp = OrigMI.getOpcode(); auto DPPOp = getDPPOp(OrigOp, IsShrinkable); if (DPPOp == -1) { @@ -201,10 +209,18 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, bool Fail = false; do { - auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst); - assert(Dst); - DPPInst.add(*Dst); - int NumOperands = 1; + int NumOperands = 0; + if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) { + DPPInst.add(*Dst); + ++NumOperands; + } + if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) { + if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) { + DPPInst.add(*SDst); + ++NumOperands; + } + // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst + } const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old); if (OldIdx != -1) { @@ -230,7 +246,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, AMDGPU::OpName::src0_modifiers)) { assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src0_modifiers)); - assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); + assert(HasVOP3DPP || + (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; } else if (AMDGPU::getNamedOperandIdx(DPPOp, @@ -253,7 +270,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, AMDGPU::OpName::src1_modifiers)) { assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src1_modifiers)); - assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); + assert(HasVOP3DPP || + (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; } else if (AMDGPU::getNamedOperandIdx(DPPOp, @@ -261,7 +279,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, DPPInst.addImm(0); ++NumOperands; } - if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { + auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); + if (Src1) { if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) { LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n"); Fail = true; @@ -270,8 +289,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, DPPInst.add(*Src1); ++NumOperands; } - - if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) { + if (auto *Mod2 = + TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) { + assert(NumOperands == + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers)); + assert(HasVOP3DPP || + (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); + DPPInst.addImm(Mod2->getImm()); + ++NumOperands; + } + auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2); + if (Src2) { if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) || !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) { LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n"); @@ -279,8 +307,62 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, break; } DPPInst.add(*Src2); + ++NumOperands; + } + if (HasVOP3DPP) { + auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp); + if (ClampOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) { + DPPInst.addImm(ClampOpr->getImm()); + } + auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in); + if (VdstInOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) { + DPPInst.add(*VdstInOpr); + } + auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod); + if (OmodOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) { + DPPInst.addImm(OmodOpr->getImm()); + } + // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to + // all 1. + if (auto *OpSelOpr = + TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) { + auto OpSel = OpSelOpr->getImm(); + if (OpSel != 0) { + LLVM_DEBUG(dbgs() << " failed: op_sel must be zero\n"); + Fail = true; + break; + } + if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1) + DPPInst.addImm(OpSel); + } + if (auto *OpSelHiOpr = + TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) { + auto OpSelHi = OpSelHiOpr->getImm(); + // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check + // the bitmask for 3 op_sel_hi bits set + assert(Src2 && "Expected vop3p with 3 operands"); + if (OpSelHi != 7) { + LLVM_DEBUG(dbgs() << " failed: op_sel_hi must be all set to one\n"); + Fail = true; + break; + } + if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1) + DPPInst.addImm(OpSelHi); + } + auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo); + if (NegOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) { + DPPInst.addImm(NegOpr->getImm()); + } + auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi); + if (NegHiOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) { + DPPInst.addImm(NegHiOpr->getImm()); + } } - DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl)); DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask)); DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask)); @@ -531,8 +613,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { } bool IsShrinkable = isShrinkable(OrigMI); - if (!(IsShrinkable || TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) { - LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n"); + if (!(IsShrinkable || + ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) || + TII->isVOP3(OrigOp)) && + ST->hasVOP3DPP()) || + TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) { + LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3/3P/C\n"); + break; + } + if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) { + LLVM_DEBUG(dbgs() << " failed: can't combine v_cmpx\n"); break; } @@ -543,9 +633,12 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { break; } + auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2); assert(Src0 && "Src1 without Src0?"); - if (Src1 && Src1->isIdenticalTo(*Src0)) { - assert(Src1->isReg()); + if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) || + (Src2 && Src2->isIdenticalTo(*Src0)))) || + (Use == Src1 && (Src1->isIdenticalTo(*Src0) || + (Src2 && Src2->isIdenticalTo(*Src1))))) { LLVM_DEBUG( dbgs() << " " << OrigMI |
