diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 76 |
1 files changed, 56 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 323aaaf70cd4..28cb2fc57ac7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #define DEBUG_TYPE "amdgpu-isel" @@ -140,7 +141,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { = TRI.getConstrainedRegClassForOperand(Src, *MRI); Optional<ValueAndVReg> ConstVal = - getConstantVRegValWithLookThrough(SrcReg, *MRI, true, true); + getIConstantVRegValWithLookThrough(SrcReg, *MRI, true); if (ConstVal) { unsigned MovOpc = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; @@ -608,11 +609,10 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock *BB = MI.getParent(); - auto ConstSrc1 = - getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true); + auto ConstSrc1 = getAnyConstantVRegValWithLookThrough(Src1, *MRI, true, true); if (ConstSrc1) { auto ConstSrc0 = - getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true); + getAnyConstantVRegValWithLookThrough(Src0, *MRI, true, true); if (ConstSrc0) { const int64_t K0 = ConstSrc0->Value.getSExtValue(); const int64_t K1 = ConstSrc1->Value.getSExtValue(); @@ -844,7 +844,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const { auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst); Optional<ValueAndVReg> ConstSelect = - getConstantVRegValWithLookThrough(LaneSelect, *MRI, true, true); + getIConstantVRegValWithLookThrough(LaneSelect, *MRI); if (ConstSelect) { // The selector has to be an inline immediate, so we can use whatever for // the other operands. @@ -853,7 +853,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const { maskTrailingOnes<uint64_t>(STI.getWavefrontSizeLog2())); } else { Optional<ValueAndVReg> ConstVal = - getConstantVRegValWithLookThrough(Val, *MRI, true, true); + getIConstantVRegValWithLookThrough(Val, *MRI); // If the value written is an inline immediate, we can get away without a // copy to m0. @@ -928,7 +928,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { case Intrinsic::amdgcn_if_break: { MachineBasicBlock *BB = I.getParent(); - // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick + // FIXME: Manually selecting to avoid dealing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) .add(I.getOperand(0)) @@ -1130,7 +1130,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { return false; Optional<ValueAndVReg> Arg = - getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true); + getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI); if (Arg.hasValue()) { const int64_t Value = Arg.getValue().Value.getSExtValue(); @@ -1242,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { - // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick + // FIXME: Manually selecting to avoid dealing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. MachineBasicBlock *BB = MI.getParent(); BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF)) @@ -1826,8 +1826,9 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { .add(I.getOperand(2)) .add(I.getOperand(3)); - bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | - constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); + bool Ret = false; + Ret |= constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); + Ret |= constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); I.eraseFromParent(); return Ret; } @@ -2387,7 +2388,7 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { STI.ldsRequiresM0Init()) { MachineBasicBlock *BB = I.getParent(); - // If DS instructions require M0 initializtion, insert it before selecting. + // If DS instructions require M0 initialization, insert it before selecting. BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addImm(-1); } @@ -2465,6 +2466,27 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG( return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } +static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) { + if (Reg.isPhysical()) + return false; + + MachineInstr &MI = *MRI.getUniqueVRegDef(Reg); + const unsigned Opcode = MI.getOpcode(); + + if (Opcode == AMDGPU::COPY) + return isVCmpResult(MI.getOperand(1).getReg(), MRI); + + if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR || + Opcode == AMDGPU::G_XOR) + return isVCmpResult(MI.getOperand(1).getReg(), MRI) && + isVCmpResult(MI.getOperand(2).getReg(), MRI); + + if (Opcode == TargetOpcode::G_INTRINSIC) + return MI.getIntrinsicID() == Intrinsic::amdgcn_class; + + return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP; +} + bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineOperand &CondOp = I.getOperand(0); @@ -2488,11 +2510,22 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { BrOpcode = AMDGPU::S_CBRANCH_SCC1; ConstrainRC = &AMDGPU::SReg_32RegClass; } else { - // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? - // We sort of know that a VCC producer based on the register bank, that ands - // inactive lanes with 0. What if there was a logical operation with vcc - // producers in different blocks/with different exec masks? // FIXME: Should scc->vcc copies and with exec? + + // Unless the value of CondReg is a result of a V_CMP* instruction then we + // need to insert an and with exec. + if (!isVCmpResult(CondReg, *MRI)) { + const bool Is64 = STI.isWave64(); + const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; + const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO; + + Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC()); + BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg) + .addReg(CondReg) + .addReg(Exec); + CondReg = TmpReg; + } + CondPhysReg = TRI.getVCC(); BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; ConstrainRC = TRI.getBoolRC(); @@ -3216,6 +3249,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case AMDGPU::G_SBFX: case AMDGPU::G_UBFX: return selectG_SBFX_UBFX(I); + case AMDGPU::G_SI_CALL: + I.setDesc(TII.get(AMDGPU::SI_CALL)); + return true; default: return selectImpl(I, *CoverageInfo); } @@ -3977,8 +4013,8 @@ AMDGPUInstructionSelector::getPtrBaseWithConstantOffset( return {Root, 0}; MachineOperand &RHS = RootI->getOperand(2); - Optional<ValueAndVReg> MaybeOffset - = getConstantVRegValWithLookThrough(RHS.getReg(), MRI, true); + Optional<ValueAndVReg> MaybeOffset = + getIConstantVRegValWithLookThrough(RHS.getReg(), MRI); if (!MaybeOffset) return {Root, 0}; return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()}; @@ -4306,8 +4342,8 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const { /// Get an immediate that must be 32-bits, and treated as zero extended. static Optional<uint64_t> getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) { - // getConstantVRegVal sexts any values, so see if that matters. - Optional<int64_t> OffsetVal = getConstantVRegSExtVal(Reg, MRI); + // getIConstantVRegVal sexts any values, so see if that matters. + Optional<int64_t> OffsetVal = getIConstantVRegSExtVal(Reg, MRI); if (!OffsetVal || !isInt<32>(*OffsetVal)) return None; return Lo_32(*OffsetVal); |
