diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU')
12 files changed, 119 insertions, 11 deletions
| diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h index bb7801c172f6..55668867cc8e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -254,7 +254,7 @@ namespace AMDGPUAS {      FLAT_ADDRESS = 0,     ///< Address space for flat memory.      GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0). -    REGION_ADDRESS = 2,   ///< Address space for region memory. +    REGION_ADDRESS = 2,   ///< Address space for region memory. (GDS)      CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)      LOCAL_ADDRESS = 3,    ///< Address space for local memory. diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 6951c915b177..8d36511a2830 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {    NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)    NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)    NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) +  NODE_NAME_CASE(DS_ORDERED_COUNT)    NODE_NAME_CASE(ATOMIC_CMP_SWAP)    NODE_NAME_CASE(ATOMIC_INC)    NODE_NAME_CASE(ATOMIC_DEC) diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 0d22cb2e3e20..d4a751d00a50 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -474,6 +474,7 @@ enum NodeType : unsigned {    TBUFFER_STORE_FORMAT_D16,    TBUFFER_LOAD_FORMAT,    TBUFFER_LOAD_FORMAT_D16, +  DS_ORDERED_COUNT,    ATOMIC_CMP_SWAP,    ATOMIC_INC,    ATOMIC_DEC, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 9dbd7751b4d8..4d0962f65fdc 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -72,6 +72,8 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;  def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;  def : SourceOfDivergence<int_amdgcn_ps_live>;  def : SourceOfDivergence<int_amdgcn_ds_swizzle>; +def : SourceOfDivergence<int_amdgcn_ds_ordered_add>; +def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;  foreach intr = AMDGPUImageDimAtomicIntrinsics in  def : SourceOfDivergence<intr>; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 11e4ba4b5010..62e7e44ddb80 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,    switch (Inst->getIntrinsicID()) {    case Intrinsic::amdgcn_atomic_inc:    case Intrinsic::amdgcn_atomic_dec: +  case Intrinsic::amdgcn_ds_ordered_add: +  case Intrinsic::amdgcn_ds_ordered_swap:    case Intrinsic::amdgcn_ds_fadd:    case Intrinsic::amdgcn_ds_fmin:    case Intrinsic::amdgcn_ds_fmax: { diff --git a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td index 31d2ebef481d..9c7097e9a520 100644 --- a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax_local">;  defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">; +def : Pat < +  (SIds_ordered_count i32:$value, i16:$offset), +  (DS_ORDERED_COUNT $value, (as_i16imm $offset)) +>; +  //===----------------------------------------------------------------------===//  // Real instructions  //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index c6396de89c4f..69ddbfb53958 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) {    }  } -static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) { +static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, +                                    const MachineInstr &MI) { +  if (TII.isAlwaysGDS(MI.getOpcode())) +    return true; +    switch (MI.getOpcode()) {    case AMDGPU::S_SENDMSG:    case AMDGPU::S_SENDMSGHALT:    case AMDGPU::S_TTRACEDATA:      return true; +  // These DS opcodes don't support GDS. +  case AMDGPU::DS_NOP: +  case AMDGPU::DS_PERMUTE_B32: +  case AMDGPU::DS_BPERMUTE_B32: +    return false;    default: -    // TODO: GDS +    if (TII.isDS(MI.getOpcode())) { +      int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), +                                           AMDGPU::OpName::gds); +      if (MI.getOperand(GDS).getImm()) +        return true; +    }      return false;    }  } @@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {        checkReadM0Hazards(MI) > 0)      return NoopHazard; -  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) && +  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&        checkReadM0Hazards(MI) > 0)      return NoopHazard; @@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {                                             isSMovRel(MI->getOpcode())))      return std::max(WaitStates, checkReadM0Hazards(MI)); -  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI)) +  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))      return std::max(WaitStates, checkReadM0Hazards(MI));    return WaitStates; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0ba921647097..12113fcc1fcb 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,    switch (IntrID) {    case Intrinsic::amdgcn_atomic_inc:    case Intrinsic::amdgcn_atomic_dec: +  case Intrinsic::amdgcn_ds_ordered_add: +  case Intrinsic::amdgcn_ds_ordered_swap:    case Intrinsic::amdgcn_ds_fadd:    case Intrinsic::amdgcn_ds_fmin:    case Intrinsic::amdgcn_ds_fmax: { @@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,    switch (II->getIntrinsicID()) {    case Intrinsic::amdgcn_atomic_inc:    case Intrinsic::amdgcn_atomic_dec: +  case Intrinsic::amdgcn_ds_ordered_add: +  case Intrinsic::amdgcn_ds_ordered_swap:    case Intrinsic::amdgcn_ds_fadd:    case Intrinsic::amdgcn_ds_fmin:    case Intrinsic::amdgcn_ds_fmax: { @@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,    SDLoc DL(Op);    switch (IntrID) { +  case Intrinsic::amdgcn_ds_ordered_add: +  case Intrinsic::amdgcn_ds_ordered_swap: { +    MemSDNode *M = cast<MemSDNode>(Op); +    SDValue Chain = M->getOperand(0); +    SDValue M0 = M->getOperand(2); +    SDValue Value = M->getOperand(3); +    unsigned OrderedCountIndex = M->getConstantOperandVal(7); +    unsigned WaveRelease = M->getConstantOperandVal(8); +    unsigned WaveDone = M->getConstantOperandVal(9); +    unsigned ShaderType; +    unsigned Instruction; + +    switch (IntrID) { +    case Intrinsic::amdgcn_ds_ordered_add: +      Instruction = 0; +      break; +    case Intrinsic::amdgcn_ds_ordered_swap: +      Instruction = 1; +      break; +    } + +    if (WaveDone && !WaveRelease) +      report_fatal_error("ds_ordered_count: wave_done requires wave_release"); + +    switch (DAG.getMachineFunction().getFunction().getCallingConv()) { +    case CallingConv::AMDGPU_CS: +    case CallingConv::AMDGPU_KERNEL: +      ShaderType = 0; +      break; +    case CallingConv::AMDGPU_PS: +      ShaderType = 1; +      break; +    case CallingConv::AMDGPU_VS: +      ShaderType = 2; +      break; +    case CallingConv::AMDGPU_GS: +      ShaderType = 3; +      break; +    default: +      report_fatal_error("ds_ordered_count unsupported for this calling conv"); +    } + +    unsigned Offset0 = OrderedCountIndex << 2; +    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | +                       (Instruction << 4); +    unsigned Offset = Offset0 | (Offset1 << 8); + +    SDValue Ops[] = { +      Chain, +      Value, +      DAG.getTargetConstant(Offset, DL, MVT::i16), +      copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue +    }; +    return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL, +                                   M->getVTList(), Ops, M->getMemoryVT(), +                                   M->getMemOperand()); +  }    case Intrinsic::amdgcn_atomic_inc:    case Intrinsic::amdgcn_atomic_dec:    case Intrinsic::amdgcn_ds_fadd: diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index afc0b4467610..3c13bccd94fa 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -536,10 +536,13 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,              CurrScore);        }        if (Inst.mayStore()) { -        setExpScore( -            &Inst, TII, TRI, MRI, -            AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), -            CurrScore); +        if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), +                                       AMDGPU::OpName::data0) != -1) { +          setExpScore( +              &Inst, TII, TRI, MRI, +              AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), +              CurrScore); +        }          if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),                                         AMDGPU::OpName::data1) != -1) {            setExpScore(&Inst, TII, TRI, MRI, @@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,    // bracket and the destination operand scores.    // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.    if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) { -    if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) { +    if (TII->isAlwaysGDS(Inst.getOpcode()) || +        TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {        ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);        ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);      } else { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2370d5fa7b27..7f7f1807987a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,           changesVGPRIndexingMode(MI);  } +bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { +  return Opcode == AMDGPU::DS_ORDERED_COUNT || +         Opcode == AMDGPU::DS_GWS_INIT || +         Opcode == AMDGPU::DS_GWS_SEMA_V || +         Opcode == AMDGPU::DS_GWS_SEMA_BR || +         Opcode == AMDGPU::DS_GWS_SEMA_P || +         Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL || +         Opcode == AMDGPU::DS_GWS_BARRIER; +} +  bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {    unsigned Opcode = MI.getOpcode(); @@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const    //       EXEC = 0, but checking for that case here seems not worth it    //       given the typical code patterns.    if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || -      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE) +      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE || +      Opcode == AMDGPU::DS_ORDERED_COUNT)      return true;    if (MI.isInlineAsm()) diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5b1a05f3785e..8847fd6babb3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -450,6 +450,8 @@ public:      return get(Opcode).TSFlags & SIInstrFlags::DS;    } +  bool isAlwaysGDS(uint16_t Opcode) const; +    static bool isMIMG(const MachineInstr &MI) {      return MI.getDesc().TSFlags & SIInstrFlags::MIMG;    } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 13afa4d4974b..180a7b0601d7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",    [SDNPMayLoad, SDNPMemOperand]  >; +def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", +  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, +  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] +>; +  def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,    [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]  >; | 
