diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 151 |
1 files changed, 130 insertions, 21 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 5bf347e48650..318de7f2e3d2 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; public: explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} + : SelectionDAGISel(TM, OptLevel){ + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); + } ~AMDGPUDAGToDAGISel() override = default; bool runOnMachineFunction(MachineFunction &MF) override; @@ -80,6 +83,7 @@ public: private: SDValue foldFrameIndex(SDValue N) const; + bool isNoNanSrc(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, const R600InstrInfo *TII); @@ -143,6 +147,8 @@ private: bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; + + bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -156,7 +162,15 @@ private: SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3OMods(SDValue In, SDValue &Src, + SDValue &Clamp, SDValue &Omod) const; + + bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp) const; + void SelectADD_SUB_I64(SDNode *N); + void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); void SelectFMA_W_CHAIN(SDNode *N); void SelectFMUL_W_CHAIN(SDNode *N); @@ -187,6 +201,17 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { return SelectionDAGISel::runOnMachineFunction(MF); } +bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { + if (TM.Options.NoNaNsFPMath) + return true; + + // TODO: Move into isKnownNeverNaN + if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N)) + return BO->Flags.hasNoNaNs(); + + return CurDAG->isKnownNeverNaN(N); +} + bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { const SIInstrInfo *TII = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); @@ -250,7 +275,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) return N; const SITargetLowering& Lowering = @@ -290,6 +315,20 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { llvm_unreachable("invalid vector size"); } +static bool getConstantValue(SDValue N, uint32_t &Out) { + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { + Out = C->getAPIntValue().getZExtValue(); + return true; + } + + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { + Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); + return true; + } + + return false; +} + void AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -319,6 +358,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectADD_SUB_I64(N); return; } + case ISD::UADDO: + case ISD::USUBO: { + SelectUADDO_USUBO(N); + return; + } case AMDGPUISD::FMUL_W_CHAIN: { SelectFMUL_W_CHAIN(N); return; @@ -336,7 +380,24 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); + + if (VT == MVT::v2i16 || VT == MVT::v2f16) { + if (Opc == ISD::BUILD_VECTOR) { + uint32_t LHSVal, RHSVal; + if (getConstantValue(N->getOperand(0), LHSVal) && + getConstantValue(N->getOperand(1), RHSVal)) { + uint32_t K = LHSVal | (RHSVal << 16); + CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, + CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); + return; + } + } + + break; + } + assert(EltVT.bitsEq(MVT::i32)); + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { RegClassID = selectSGPRVectorRegClassID(NumVectorElts); } else { @@ -502,7 +563,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); - Lowering.legalizeTargetIndependentNode(N, *CurDAG); + N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); break; } case ISD::AND: @@ -531,9 +592,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { if (!N->readMem()) return false; if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; - return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; } bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { @@ -689,6 +750,17 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { CurDAG->RemoveDeadNode(N); } +void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { + // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned + // carry out despite the _i32 name. These were renamed in VI to _U32. + // FIXME: We should probably rename the opcodes here. + unsigned Opc = N->getOpcode() == ISD::UADDO ? + AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), + { N->getOperand(0), N->getOperand(1) }); +} + void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { SDLoc SL(N); // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod @@ -1176,16 +1248,6 @@ bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, return true; } -/// -/// \param EncodedOffset This is the immediate value that will be encoded -/// directly into the instruction. On SI/CI the \p EncodedOffset -/// will be in units of dwords and on VI+ it will be units of bytes. -static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, - int64_t EncodedOffset) { - return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? - isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); -} - bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const { @@ -1197,10 +1259,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDLoc SL(ByteOffsetNode); AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); int64_t ByteOffset = C->getSExtValue(); - int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? - ByteOffset >> 2 : ByteOffset; + int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); - if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { + if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); Imm = true; return true; @@ -1481,7 +1542,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MemSDNode *Mem = cast<MemSDNode>(N); unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUAS::FLAT_ADDRESS) { + if (AS == AMDGPUASI.FLAT_ADDRESS) { SelectCode(N); return; } @@ -1545,7 +1606,6 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const { unsigned Mods = 0; - Src = In; if (Src.getOpcode() == ISD::FNEG) { @@ -1559,10 +1619,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, } SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; } +bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + SelectVOP3Mods(In, Src, SrcMods); + return isNoNanSrc(Src); +} + bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const { bool Res = SelectVOP3Mods(In, Src, SrcMods); @@ -1607,6 +1672,50 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, + SDValue &Clamp, SDValue &Omod) const { + Src = In; + + SDLoc DL(In); + // FIXME: Handle Clamp and Omod + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + + return true; +} + +bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + unsigned Mods = 0; + Src = In; + + // FIXME: Look for on separate components + if (Src.getOpcode() == ISD::FNEG) { + Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI); + Src = Src.getOperand(0); + } + + // Packed instructions do not have abs modifiers. + + // FIXME: Handle abs/neg of individual components. + // FIXME: Handle swizzling with op_sel + Mods |= SISrcMods::OP_SEL_1; + + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Clamp) const { + SDLoc SL(In); + + // FIXME: Handle clamp and op_sel + Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return SelectVOP3PMods(In, Src, SrcMods); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); |