diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIInsertSkips.cpp')
| -rw-r--r-- | lib/Target/AMDGPU/SIInsertSkips.cpp | 125 |
1 files changed, 106 insertions, 19 deletions
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index ba346d2fad02..a2f844d7854e 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -132,6 +132,16 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) return true; + // V_READFIRSTLANE/V_READLANE destination register may be used as operand + // by some SALU instruction. If exec mask is zero vector instruction + // defining the register that is used by the scalar one is not executed + // and scalar instruction will operate on undefined data. For + // V_READFIRSTLANE/V_READLANE we should avoid predicated execution. + if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) || + (I->getOpcode() == AMDGPU::V_READLANE_B32)) { + return true; + } + if (I->isInlineAsm()) { const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); const char *AsmStr = I->getOperand(0).getSymbolName(); @@ -156,7 +166,7 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction *MF = MBB.getParent(); - if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS || + if (MF->getFunction().getCallingConv() != CallingConv::AMDGPU_PS || !shouldSkip(MBB, MBB.getParent()->back())) return false; @@ -190,25 +200,101 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { void SIInsertSkips::kill(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); - const MachineOperand &Op = MI.getOperand(0); - -#ifndef NDEBUG - CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv(); - // Kill is only allowed in pixel / geometry shaders. - assert(CallConv == CallingConv::AMDGPU_PS || - CallConv == CallingConv::AMDGPU_GS); -#endif - // Clear this thread from the exec mask if the operand is negative. - if (Op.isImm()) { - // Constant operand: Set exec mask to 0 or do nothing - if (Op.getImm() & 0x80000000) { - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) - .addImm(0); + + switch (MI.getOpcode()) { + case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: { + unsigned Opcode = 0; + + // The opcodes are inverted because the inline immediate has to be + // the first operand, e.g. from "x < imm" to "imm > x" + switch (MI.getOperand(2).getImm()) { + case ISD::SETOEQ: + case ISD::SETEQ: + Opcode = AMDGPU::V_CMPX_EQ_F32_e32; + break; + case ISD::SETOGT: + case ISD::SETGT: + Opcode = AMDGPU::V_CMPX_LT_F32_e32; + break; + case ISD::SETOGE: + case ISD::SETGE: + Opcode = AMDGPU::V_CMPX_LE_F32_e32; + break; + case ISD::SETOLT: + case ISD::SETLT: + Opcode = AMDGPU::V_CMPX_GT_F32_e32; + break; + case ISD::SETOLE: + case ISD::SETLE: + Opcode = AMDGPU::V_CMPX_GE_F32_e32; + break; + case ISD::SETONE: + case ISD::SETNE: + Opcode = AMDGPU::V_CMPX_LG_F32_e32; + break; + case ISD::SETO: + Opcode = AMDGPU::V_CMPX_O_F32_e32; + break; + case ISD::SETUO: + Opcode = AMDGPU::V_CMPX_U_F32_e32; + break; + case ISD::SETUEQ: + Opcode = AMDGPU::V_CMPX_NLG_F32_e32; + break; + case ISD::SETUGT: + Opcode = AMDGPU::V_CMPX_NGE_F32_e32; + break; + case ISD::SETUGE: + Opcode = AMDGPU::V_CMPX_NGT_F32_e32; + break; + case ISD::SETULT: + Opcode = AMDGPU::V_CMPX_NLE_F32_e32; + break; + case ISD::SETULE: + Opcode = AMDGPU::V_CMPX_NLT_F32_e32; + break; + case ISD::SETUNE: + Opcode = AMDGPU::V_CMPX_NEQ_F32_e32; + break; + default: + llvm_unreachable("invalid ISD:SET cond code"); } - } else { - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32)) - .addImm(0) + + // TODO: Allow this: + if (!MI.getOperand(0).isReg() || + !TRI->isVGPR(MBB.getParent()->getRegInfo(), + MI.getOperand(0).getReg())) + llvm_unreachable("SI_KILL operand should be a VGPR"); + + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .add(MI.getOperand(1)) + .add(MI.getOperand(0)); + break; + } + case AMDGPU::SI_KILL_I1_TERMINATOR: { + const MachineOperand &Op = MI.getOperand(0); + int64_t KillVal = MI.getOperand(1).getImm(); + assert(KillVal == 0 || KillVal == -1); + + // Kill all threads if Op0 is an immediate and equal to the Kill value. + if (Op.isImm()) { + int64_t Imm = Op.getImm(); + assert(Imm == 0 || Imm == -1); + + if (Imm == KillVal) + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addImm(0); + break; + } + + unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64; + BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) .add(Op); + break; + } + default: + llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR"); } } @@ -301,7 +387,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { } break; - case AMDGPU::SI_KILL_TERMINATOR: + case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: + case AMDGPU::SI_KILL_I1_TERMINATOR: MadeChange = true; kill(MI); |
