diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 686 |
1 files changed, 495 insertions, 191 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index f235313e48535..f4776adb069c9 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,10 +13,12 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" #include "SIDefines.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" @@ -68,19 +70,30 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; AMDGPUAS AMDGPUASI; + bool EnableLateStructurizeCFG; public: - explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel){ - AMDGPUASI = AMDGPU::getAMDGPUAS(TM); + explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, + CodeGenOpt::Level OptLevel = CodeGenOpt::Default) + : SelectionDAGISel(*TM, OptLevel) { + AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); + EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; } ~AMDGPUDAGToDAGISel() override = default; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AMDGPUArgumentUsageInfo>(); + SelectionDAGISel::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; void Select(SDNode *N) override; StringRef getPassName() const override; void PostprocessISelDAG() override; +protected: + void SelectBuildVector(SDNode *N, unsigned RegClassID); + private: std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; bool isNoNanSrc(SDValue N) const; @@ -99,8 +112,8 @@ private: bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, SDValue& Offset); - bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); + virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, unsigned OffsetBits) const; bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; @@ -116,10 +129,10 @@ private: bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; - bool SelectMUBUFScratchOffen(SDNode *Root, + bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const; - bool SelectMUBUFScratchOffset(SDNode *Root, + bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; @@ -140,6 +153,10 @@ private: bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; + bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; + + template <bool IsSigned> bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; @@ -152,10 +169,10 @@ private: bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; - bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -174,9 +191,22 @@ private: bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp) const; + bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp) const; + + bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp) const; + bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; + bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + + bool SelectHi16Elt(SDValue In, SDValue &Src) const; + void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); + void SelectMAD_64_32(SDNode *N); void SelectFMA_W_CHAIN(SDNode *N); void SelectFMUL_W_CHAIN(SDNode *N); @@ -186,21 +216,49 @@ private: void SelectS_BFE(SDNode *N); bool isCBranchSCC(const SDNode *N) const; void SelectBRCOND(SDNode *N); + void SelectFMAD(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); +protected: // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" }; +class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { +public: + explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : + AMDGPUDAGToDAGISel(TM, OptLevel) {} + + void Select(SDNode *N) override; + + bool SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) override; + bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) override; +}; + } // end anonymous namespace +INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", + "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) +INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) +INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", + "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) + /// \brief This pass converts a legalized DAG into a AMDGPU-specific // DAG, ready for instruction scheduling. -FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, +FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel) { return new AMDGPUDAGToDAGISel(TM, OptLevel); } +/// \brief This pass converts a legalized DAG into a R600-specific +// DAG, ready for instruction scheduling. +FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, + CodeGenOpt::Level OptLevel) { + return new R600DAGToDAGISel(TM, OptLevel); +} + bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); return SelectionDAGISel::runOnMachineFunction(MF); @@ -279,8 +337,8 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) + if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || + !Subtarget->ldsRequiresM0Init()) return N; const SITargetLowering& Lowering = @@ -298,9 +356,7 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { Ops.push_back(N->getOperand(i)); } Ops.push_back(Glue); - CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); - - return N; + return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); } static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { @@ -334,6 +390,58 @@ static bool getConstantValue(SDValue N, uint32_t &Out) { return false; } +void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { + EVT VT = N->getValueType(0); + unsigned NumVectorElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); + SDLoc DL(N); + SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); + + if (NumVectorElts == 1) { + CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), + RegClass); + return; + } + + assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " + "supported yet"); + // 16 = Max Num Vector Elements + // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) + // 1 = Vector Register Class + SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); + + RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); + bool IsRegSeq = true; + unsigned NOps = N->getNumOperands(); + for (unsigned i = 0; i < NOps; i++) { + // XXX: Why is this here? + if (isa<RegisterSDNode>(N->getOperand(i))) { + IsRegSeq = false; + break; + } + RegSeqArgs[1 + (2 * i)] = N->getOperand(i); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, + MVT::i32); + } + if (NOps != NumVectorElts) { + // Fill in the missing undef elements if this was a scalar_to_vector. + assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); + MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + DL, EltVT); + for (unsigned i = NOps; i < NumVectorElts; ++i) { + RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); + } + } + + if (!IsRegSeq) + SelectCode(N); + CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); +} + void AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -346,18 +454,16 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { N = glueCopyToM0(N); switch (Opc) { - default: break; + default: + break; // We are selecting i64 ADD here instead of custom lower it during // DAG legalization, so we can fold some i64 ADDs used for address // calculation into the LOAD and STORE instructions. - case ISD::ADD: case ISD::ADDC: case ISD::ADDE: - case ISD::SUB: case ISD::SUBC: case ISD::SUBE: { - if (N->getValueType(0) != MVT::i64 || - Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (N->getValueType(0) != MVT::i64) break; SelectADD_SUB_I64(N); @@ -378,13 +484,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { } case ISD::SCALAR_TO_VECTOR: - case AMDGPUISD::BUILD_VERTICAL_VECTOR: case ISD::BUILD_VECTOR: { - unsigned RegClassID; - const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); if (VT == MVT::v2i16 || VT == MVT::v2f16) { if (Opc == ISD::BUILD_VECTOR) { @@ -401,81 +503,13 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } - assert(EltVT.bitsEq(MVT::i32)); - - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - RegClassID = selectSGPRVectorRegClassID(NumVectorElts); - } else { - // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG - // that adds a 128 bits reg copy when going through TwoAddressInstructions - // pass. We want to avoid 128 bits copies as much as possible because they - // can't be bundled by our scheduler. - switch(NumVectorElts) { - case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; - case 4: - if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) - RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; - else - RegClassID = AMDGPU::R600_Reg128RegClassID; - break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); - } - } - - SDLoc DL(N); - SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); - - if (NumVectorElts == 1) { - CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), - RegClass); - return; - } - - assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " - "supported yet"); - // 16 = Max Num Vector Elements - // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) - // 1 = Vector Register Class - SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); - - RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); - bool IsRegSeq = true; - unsigned NOps = N->getNumOperands(); - for (unsigned i = 0; i < NOps; i++) { - // XXX: Why is this here? - if (isa<RegisterSDNode>(N->getOperand(i))) { - IsRegSeq = false; - break; - } - RegSeqArgs[1 + (2 * i)] = N->getOperand(i); - RegSeqArgs[1 + (2 * i) + 1] = - CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, - MVT::i32); - } - - if (NOps != NumVectorElts) { - // Fill in the missing undef elements if this was a scalar_to_vector. - assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); - - MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - DL, EltVT); - for (unsigned i = NOps; i < NumVectorElts; ++i) { - RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); - RegSeqArgs[1 + (2 * i) + 1] = - CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); - } - } - - if (!IsRegSeq) - break; - CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); + assert(VT.getVectorElementType().bitsEq(MVT::i32)); + unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); + SelectBuildVector(N, RegClassID); return; } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; - if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - break; - } SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); @@ -497,8 +531,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: case ISD::ConstantFP: { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) + if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) break; uint64_t Imm; @@ -533,9 +566,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) - break; - // There is a scalar version available, but unlike the vector version which // has a separate operand for the offset and width, the scalar version packs // the width and offset into a single operand. Try to move to the scalar @@ -565,6 +595,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectDIV_SCALE(N); return; } + case AMDGPUISD::MAD_I64_I32: + case AMDGPUISD::MAD_U64_U32: { + SelectMAD_64_32(N); + return; + } case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); @@ -575,8 +610,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::SRL: case ISD::SRA: case ISD::SIGN_EXTEND_INREG: - if (N->getValueType(0) != MVT::i32 || - Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (N->getValueType(0) != MVT::i32) break; SelectS_BFE(N); @@ -584,7 +618,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::BRCOND: SelectBRCOND(N); return; - + case ISD::FMAD: + SelectFMAD(N); + return; case AMDGPUISD::ATOMIC_CMP_SWAP: SelectATOMIC_CMP_SWAP(N); return; @@ -638,32 +674,8 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, } bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, - SDValue &Offset) { - ConstantSDNode *IMMOffset; - - if (Addr.getOpcode() == ISD::ADD - && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - && isInt<16>(IMMOffset->getZExtValue())) { - - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - // If the pointer address is constant, we can move it to the offset field. - } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) - && isInt<16>(IMMOffset->getZExtValue())) { - Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - SDLoc(CurDAG->getEntryNode()), - AMDGPU::ZERO, MVT::i32); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); - return true; + SDValue &Offset) { + return false; } bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, @@ -690,6 +702,7 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } +// FIXME: Should only handle addcarry/subcarry void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDLoc DL(N); SDValue LHS = N->getOperand(0); @@ -699,8 +712,7 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); bool ProduceCarry = ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; - bool IsAdd = - (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); + bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); @@ -782,7 +794,7 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { SDLoc SL(N); - // src0_modifiers, src0, src1_modifiers, src1, clamp, omod + // src0_modifiers, src0, src1_modifiers, src1, clamp, omod SDValue Ops[8]; SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); @@ -808,6 +820,19 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } +// We need to handle this here because tablegen doesn't support matching +// instructions with multiple outputs. +void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { + SDLoc SL(N); + bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; + unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; + + SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + Clamp }; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); +} + bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, unsigned OffsetBits) const { if ((OffsetBits == 16 && !isUInt<16>(Offset)) || @@ -850,8 +875,12 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, Zero, Addr.getOperand(1)); if (isDSOffsetLegal(Sub, ByteOffset, 16)) { + // FIXME: Select to VOP3 version for with-carry. + unsigned SubOp = Subtarget->hasAddNoCarry() ? + AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + MachineSDNode *MachineSub - = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Zero, Addr.getOperand(1)); Base = SDValue(MachineSub, 0); @@ -920,8 +949,11 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, Zero, Addr.getOperand(1)); if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { + unsigned SubOp = Subtarget->hasAddNoCarry() ? + AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + MachineSDNode *MachineSub - = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Zero, Addr.getOperand(1)); Base = SDValue(MachineSub, 0); @@ -958,14 +990,6 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, return true; } -static bool isLegalMUBUFImmOffset(unsigned Imm) { - return isUInt<12>(Imm); -} - -static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { - return isLegalMUBUFImmOffset(Imm->getZExtValue()); -} - bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, @@ -1007,7 +1031,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, Ptr = N0; } - if (isLegalMUBUFImmOffset(C1)) { + if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } @@ -1104,7 +1128,7 @@ std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const MVT::i32)); } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &Rsrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const { @@ -1117,8 +1141,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { unsigned Imm = CAddr->getZExtValue(); - assert(!isLegalMUBUFImmOffset(Imm) && - "should have been selected by other pattern"); SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, @@ -1127,7 +1149,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, // In a call sequence, stores to the argument stack area are relative to the // stack pointer. - const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); + const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); @@ -1142,9 +1164,25 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - // Offsets in vaddr must be positive. + // Offsets in vaddr must be positive if range checking is enabled. + // + // The total computation of vaddr + soffset + offset must not overflow. If + // vaddr is negative, even if offset is 0 the sgpr offset add will end up + // overflowing. + // + // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would + // always perform a range check. If a negative vaddr base index was used, + // this would fail the range check. The overall address computation would + // compute a valid address, but this doesn't happen due to the range + // check. For out-of-bounds MUBUF loads, a 0 is returned. + // + // Therefore it should be safe to fold any VGPR offset on gfx9 into the + // MUBUF vaddr, but not on older subtargets which can only do this if the + // sign bit is known 0. ConstantSDNode *C1 = cast<ConstantSDNode>(N1); - if (isLegalMUBUFImmOffset(C1)) { + if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && + (!Subtarget->privateMemoryResourceIsRangeChecked() || + CurDAG->SignBitIsZero(N0))) { std::tie(VAddr, SOffset) = foldFrameIndex(N0); ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; @@ -1157,13 +1195,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, return true; } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset) const { ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); - if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) + if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) return false; SDLoc DL(Addr); @@ -1172,7 +1210,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); - const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); + const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); @@ -1231,24 +1269,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, SDValue &SOffset, SDValue &ImmOffset) const { SDLoc DL(Constant); + const uint32_t Align = 4; + const uint32_t MaxImm = alignDown(4095, Align); uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); uint32_t Overflow = 0; - if (Imm >= 4096) { - if (Imm <= 4095 + 64) { - // Use an SOffset inline constant for 1..64 - Overflow = Imm - 4095; - Imm = 4095; + if (Imm > MaxImm) { + if (Imm <= MaxImm + 64) { + // Use an SOffset inline constant for 4..64 + Overflow = Imm - MaxImm; + Imm = MaxImm; } else { // Try to keep the same value in SOffset for adjacent loads, so that // the corresponding register contents can be re-used. // - // Load values with all low-bits set into SOffset, so that a larger - // range of values can be covered using s_movk_i32 - uint32_t High = (Imm + 1) & ~4095; - uint32_t Low = (Imm + 1) & 4095; + // Load values with all low-bits (except for alignment bits) set into + // SOffset, so that a larger range of values can be covered using + // s_movk_i32. + // + // Atomic operations fail to work correctly when individual address + // components are unaligned, even if their sum is aligned. + uint32_t High = (Imm + Align) & ~4095; + uint32_t Low = (Imm + Align) & 4095; Imm = Low; - Overflow = High - 1; + Overflow = High - Align; } } @@ -1316,6 +1360,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, return true; } +template <bool IsSigned> bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, SDValue &VAddr, SDValue &Offset, @@ -1326,8 +1371,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue(); - if (isUInt<12>(COffsetVal)) { + int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); + + if ((IsSigned && isInt<13>(COffsetVal)) || + (!IsSigned && isUInt<12>(COffsetVal))) { Addr = N0; OffsetVal = COffsetVal; } @@ -1344,7 +1391,14 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return SelectFlatOffset(Addr, VAddr, Offset, SLC); + return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); +} + +bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); } bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, @@ -1443,13 +1497,6 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, return !Imm && isa<ConstantSDNode>(Offset); } -bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, - SDValue &Offset) const { - bool Imm; - return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && - !isa<ConstantSDNode>(Offset); -} - bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const { @@ -1622,18 +1669,55 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { return; } - if (isCBranchSCC(N)) { - // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. + bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); + unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; + unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; + SDLoc SL(N); + + SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); + CurDAG->SelectNodeTo(N, BrOp, MVT::Other, + N->getOperand(2), // Basic Block + VCC.getValue(0)); +} + +void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) { + MVT VT = N->getSimpleValueType(0); + if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) { SelectCode(N); return; } - SDLoc SL(N); + SDValue Src0 = N->getOperand(0); + SDValue Src1 = N->getOperand(1); + SDValue Src2 = N->getOperand(2); + unsigned Src0Mods, Src1Mods, Src2Mods; - SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); - CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, - N->getOperand(2), // Basic Block - VCC.getValue(0)); + // Avoid using v_mad_mix_f32 unless there is actually an operand using the + // conversion from f16. + bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); + bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); + bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); + + assert(!Subtarget->hasFP32Denormals() && + "fmad selected with denormals enabled"); + // TODO: We can select this with f32 denormals enabled if all the sources are + // converted from f16 (in which case fmad isn't legal). + + if (Sel0 || Sel1 || Sel2) { + // For dummy operands. + SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); + SDValue Ops[] = { + CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, + CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, + CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, + CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), + Zero, Zero + }; + + CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops); + } else { + SelectCode(N); + } } // This is here because there isn't a way to use the generated sub0_sub1 as the @@ -1652,11 +1736,11 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MachineSDNode *CmpSwap = nullptr; if (Subtarget->hasAddr64()) { - SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; + SDValue SRsrc, VAddr, SOffset, Offset, SLC; if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { - unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; + unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : + AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; SDValue CmpVal = Mem->getOperand(2); // XXX - Do we care about glue operands? @@ -1672,8 +1756,8 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { if (!CmpSwap) { SDValue SRsrc, SOffset, Offset, SLC; if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { - unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; + unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : + AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; SDValue CmpVal = Mem->getOperand(2); SDValue Ops[] = { @@ -1702,9 +1786,9 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { CurDAG->RemoveDeadNode(N); } -bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - unsigned Mods = 0; +bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, + unsigned &Mods) const { + Mods = 0; Src = In; if (Src.getOpcode() == ISD::FNEG) { @@ -1717,10 +1801,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, Src = Src.getOperand(0); } - SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); return true; } +bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + unsigned Mods; + if (SelectVOP3ModsImpl(In, Src, Mods)) { + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; + } + + return false; +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const { SelectVOP3Mods(In, Src, SrcMods); @@ -1864,24 +1958,234 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, return SelectVOP3PMods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + Src = In; + // FIXME: Handle op_sel + SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Clamp) const { + SDLoc SL(In); + + // FIXME: Handle clamp + Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return SelectVOP3OpSel(In, Src, SrcMods); +} + +bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + // FIXME: Handle op_sel + return SelectVOP3Mods(In, Src, SrcMods); +} + +bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Clamp) const { + SDLoc SL(In); + + // FIXME: Handle clamp + Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return SelectVOP3OpSelMods(In, Src, SrcMods); +} + +// The return value is not whether the match is possible (which it always is), +// but whether or not it a conversion is really used. +bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, + unsigned &Mods) const { + Mods = 0; + SelectVOP3ModsImpl(In, Src, Mods); + + if (Src.getOpcode() == ISD::FP_EXTEND) { + Src = Src.getOperand(0); + assert(Src.getValueType() == MVT::f16); + Src = stripBitcast(Src); + + // Be careful about folding modifiers if we already have an abs. fneg is + // applied last, so we don't want to apply an earlier fneg. + if ((Mods & SISrcMods::ABS) == 0) { + unsigned ModsTmp; + SelectVOP3ModsImpl(Src, Src, ModsTmp); + + if ((ModsTmp & SISrcMods::NEG) != 0) + Mods ^= SISrcMods::NEG; + + if ((ModsTmp & SISrcMods::ABS) != 0) + Mods |= SISrcMods::ABS; + } + + // op_sel/op_sel_hi decide the source type and source. + // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. + // If the sources's op_sel is set, it picks the high half of the source + // register. + + Mods |= SISrcMods::OP_SEL_1; + if (isExtractHiElt(Src, Src)) { + Mods |= SISrcMods::OP_SEL_0; + + // TODO: Should we try to look for neg/abs here? + } + + return true; + } + + return false; +} + +bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + unsigned Mods = 0; + SelectVOP3PMadMixModsImpl(In, Src, Mods); + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +// TODO: Can we identify things like v_mad_mixhi_f16? +bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { + if (In.isUndef()) { + Src = In; + return true; + } + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { + SDLoc SL(In); + SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); + MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + SL, MVT::i32, K); + Src = SDValue(MovK, 0); + return true; + } + + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { + SDLoc SL(In); + SDValue K = CurDAG->getTargetConstant( + C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); + MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + SL, MVT::i32, K); + Src = SDValue(MovK, 0); + return true; + } + + return isExtractHiElt(In, Src); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); bool IsModified = false; do { IsModified = false; + // Go over all selected nodes and try to fold them a bit more - for (SDNode &Node : CurDAG->allnodes()) { - MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); + while (Position != CurDAG->allnodes_end()) { + SDNode *Node = &*Position++; + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); if (!MachineNode) continue; SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); - if (ResNode != &Node) { - ReplaceUses(&Node, ResNode); + if (ResNode != Node) { + if (ResNode) + ReplaceUses(Node, ResNode); IsModified = true; } } CurDAG->RemoveDeadNodes(); } while (IsModified); } + +void R600DAGToDAGISel::Select(SDNode *N) { + unsigned int Opc = N->getOpcode(); + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return; // Already selected. + } + + switch (Opc) { + default: break; + case AMDGPUISD::BUILD_VERTICAL_VECTOR: + case ISD::SCALAR_TO_VECTOR: + case ISD::BUILD_VECTOR: { + EVT VT = N->getValueType(0); + unsigned NumVectorElts = VT.getVectorNumElements(); + unsigned RegClassID; + // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG + // that adds a 128 bits reg copy when going through TwoAddressInstructions + // pass. We want to avoid 128 bits copies as much as possible because they + // can't be bundled by our scheduler. + switch(NumVectorElts) { + case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; + case 4: + if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) + RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; + else + RegClassID = AMDGPU::R600_Reg128RegClassID; + break; + default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + } + SelectBuildVector(N, RegClassID); + return; + } + } + + SelectCode(N); +} + +bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *C; + SDLoc DL(Addr); + + if ((C = dyn_cast<ConstantSDNode>(Addr))) { + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && + (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && + (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); + } + + return true; +} + +bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *IMMOffset; + + if (Addr.getOpcode() == ISD::ADD + && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) + && isInt<16>(IMMOffset->getZExtValue())) { + + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + // If the pointer address is constant, we can move it to the offset field. + } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) + && isInt<16>(IMMOffset->getZExtValue())) { + Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + SDLoc(CurDAG->getEntryNode()), + AMDGPU::ZERO, MVT::i32); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + return true; +} |
