summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIInsertSkips.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIInsertSkips.cpp')
-rw-r--r--lib/Target/AMDGPU/SIInsertSkips.cpp125
1 files changed, 106 insertions, 19 deletions
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index ba346d2fad02..a2f844d7854e 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -132,6 +132,16 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
+ // V_READFIRSTLANE/V_READLANE destination register may be used as operand
+ // by some SALU instruction. If exec mask is zero vector instruction
+ // defining the register that is used by the scalar one is not executed
+ // and scalar instruction will operate on undefined data. For
+ // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
+ if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
+ (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+ return true;
+ }
+
if (I->isInlineAsm()) {
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
const char *AsmStr = I->getOperand(0).getSymbolName();
@@ -156,7 +166,7 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction *MF = MBB.getParent();
- if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
+ if (MF->getFunction().getCallingConv() != CallingConv::AMDGPU_PS ||
!shouldSkip(MBB, MBB.getParent()->back()))
return false;
@@ -190,25 +200,101 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
void SIInsertSkips::kill(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- const MachineOperand &Op = MI.getOperand(0);
-
-#ifndef NDEBUG
- CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
- // Kill is only allowed in pixel / geometry shaders.
- assert(CallConv == CallingConv::AMDGPU_PS ||
- CallConv == CallingConv::AMDGPU_GS);
-#endif
- // Clear this thread from the exec mask if the operand is negative.
- if (Op.isImm()) {
- // Constant operand: Set exec mask to 0 or do nothing
- if (Op.getImm() & 0x80000000) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addImm(0);
+
+ switch (MI.getOpcode()) {
+ case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {
+ unsigned Opcode = 0;
+
+ // The opcodes are inverted because the inline immediate has to be
+ // the first operand, e.g. from "x < imm" to "imm > x"
+ switch (MI.getOperand(2).getImm()) {
+ case ISD::SETOEQ:
+ case ISD::SETEQ:
+ Opcode = AMDGPU::V_CMPX_EQ_F32_e32;
+ break;
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ Opcode = AMDGPU::V_CMPX_LT_F32_e32;
+ break;
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Opcode = AMDGPU::V_CMPX_LE_F32_e32;
+ break;
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ Opcode = AMDGPU::V_CMPX_GT_F32_e32;
+ break;
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Opcode = AMDGPU::V_CMPX_GE_F32_e32;
+ break;
+ case ISD::SETONE:
+ case ISD::SETNE:
+ Opcode = AMDGPU::V_CMPX_LG_F32_e32;
+ break;
+ case ISD::SETO:
+ Opcode = AMDGPU::V_CMPX_O_F32_e32;
+ break;
+ case ISD::SETUO:
+ Opcode = AMDGPU::V_CMPX_U_F32_e32;
+ break;
+ case ISD::SETUEQ:
+ Opcode = AMDGPU::V_CMPX_NLG_F32_e32;
+ break;
+ case ISD::SETUGT:
+ Opcode = AMDGPU::V_CMPX_NGE_F32_e32;
+ break;
+ case ISD::SETUGE:
+ Opcode = AMDGPU::V_CMPX_NGT_F32_e32;
+ break;
+ case ISD::SETULT:
+ Opcode = AMDGPU::V_CMPX_NLE_F32_e32;
+ break;
+ case ISD::SETULE:
+ Opcode = AMDGPU::V_CMPX_NLT_F32_e32;
+ break;
+ case ISD::SETUNE:
+ Opcode = AMDGPU::V_CMPX_NEQ_F32_e32;
+ break;
+ default:
+ llvm_unreachable("invalid ISD:SET cond code");
}
- } else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
- .addImm(0)
+
+ // TODO: Allow this:
+ if (!MI.getOperand(0).isReg() ||
+ !TRI->isVGPR(MBB.getParent()->getRegInfo(),
+ MI.getOperand(0).getReg()))
+ llvm_unreachable("SI_KILL operand should be a VGPR");
+
+ BuildMI(MBB, &MI, DL, TII->get(Opcode))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(0));
+ break;
+ }
+ case AMDGPU::SI_KILL_I1_TERMINATOR: {
+ const MachineOperand &Op = MI.getOperand(0);
+ int64_t KillVal = MI.getOperand(1).getImm();
+ assert(KillVal == 0 || KillVal == -1);
+
+ // Kill all threads if Op0 is an immediate and equal to the Kill value.
+ if (Op.isImm()) {
+ int64_t Imm = Op.getImm();
+ assert(Imm == 0 || Imm == -1);
+
+ if (Imm == KillVal)
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addImm(0);
+ break;
+ }
+
+ unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
+ BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
.add(Op);
+ break;
+ }
+ default:
+ llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR");
}
}
@@ -301,7 +387,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
}
break;
- case AMDGPU::SI_KILL_TERMINATOR:
+ case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
+ case AMDGPU::SI_KILL_I1_TERMINATOR:
MadeChange = true;
kill(MI);