diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU')
10 files changed, 235 insertions, 53 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index bb628b8c558f..fda6252f46e3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -695,18 +695,24 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(            IsSGPR = false;            Width = 3;          } else if (AMDGPU::SReg_128RegClass.contains(Reg)) { +          assert(!AMDGPU::TTMP_128RegClass.contains(Reg) && +            "trap handler registers should not be used");            IsSGPR = true;            Width = 4;          } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {            IsSGPR = false;            Width = 4;          } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { +          assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && +            "trap handler registers should not be used");            IsSGPR = true;            Width = 8;          } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {            IsSGPR = false;            Width = 8;          } else if (AMDGPU::SReg_512RegClass.contains(Reg)) { +          assert(!AMDGPU::TTMP_512RegClass.contains(Reg) && +            "trap handler registers should not be used");            IsSGPR = true;            Width = 16;          } else if (AMDGPU::VReg_512RegClass.contains(Reg)) { diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 3f8a9b1964ca..5c31bddd9b1a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -202,6 +202,16 @@ public:    const char* getTargetNodeName(unsigned Opcode) const override; +  // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection +  // for AMDGPU. +  // A commit ( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319036 +  // 91177308-0d34-0410-b5e6-96231b3b80d8 ) turned on +  // MergeConsecutiveStores() before Instruction Selection for all targets. +  // Enough AMDGPU compiles go into an infinite loop ( MergeConsecutiveStores() +  // merges two stores; LegalizeStoreOps() un-merges; MergeConsecutiveStores() +  // re-merges, etc. ) to warrant turning it off for now. +  bool mergeStoresAfterLegalization() const override { return false; } +    bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {      return true;    } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 6984f4e71613..2042dbf6d5e2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -571,10 +571,9 @@ public:  } // end anonymous namespace -TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { -  return TargetIRAnalysis([this](const Function &F) { -    return TargetTransformInfo(AMDGPUTTIImpl(this, F)); -  }); +TargetTransformInfo +AMDGPUTargetMachine::getTargetTransformInfo(const Function &F) { +  return TargetTransformInfo(AMDGPUTTIImpl(this, F));  }  void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { @@ -898,4 +897,3 @@ void GCNPassConfig::addPreEmitPass() {  TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {    return new GCNPassConfig(*this, PM);  } - diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 5043e31f6f5b..5f9b2a7fca20 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -55,7 +55,7 @@ public:    const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {      return &IntrinsicInfo;    } -  TargetIRAnalysis getTargetIRAnalysis() override; +  TargetTransformInfo getTargetTransformInfo(const Function &F) override;    TargetLoweringObjectFile *getObjFileLowering() const override {      return TLOF.get(); diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2acd7f78faea..ebf656c549ec 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -536,6 +536,10 @@ public:      return EndLoc;    } +  SMRange getLocRange() const { +    return SMRange(StartLoc, EndLoc); +  } +    Modifiers getModifiers() const {      assert(isRegKind() || isImmTy(ImmTyNone));      return isRegKind() ? Reg.Mods : Imm.Mods; @@ -1491,6 +1495,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {        case 1: return AMDGPU::TTMP_32RegClassID;        case 2: return AMDGPU::TTMP_64RegClassID;        case 4: return AMDGPU::TTMP_128RegClassID; +      case 8: return AMDGPU::TTMP_256RegClassID; +      case 16: return AMDGPU::TTMP_512RegClassID;      }    } else if (Is == IS_SGPR) {      switch (RegWidth) { @@ -1498,8 +1504,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {        case 1: return AMDGPU::SGPR_32RegClassID;        case 2: return AMDGPU::SGPR_64RegClassID;        case 4: return AMDGPU::SGPR_128RegClassID; -      case 8: return AMDGPU::SReg_256RegClassID; -      case 16: return AMDGPU::SReg_512RegClassID; +      case 8: return AMDGPU::SGPR_256RegClassID; +      case 16: return AMDGPU::SGPR_512RegClassID;      }    }    return -1; @@ -1754,6 +1760,11 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {    // TODO: add syntactic sugar for 1/(2*PI)    bool Minus = false;    if (getLexer().getKind() == AsmToken::Minus) { +    const AsmToken NextToken = getLexer().peekTok(); +    if (!NextToken.is(AsmToken::Integer) && +        !NextToken.is(AsmToken::Real)) { +        return MatchOperand_NoMatch; +    }      Minus = true;      Parser.Lex();    } @@ -1783,7 +1794,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {      return MatchOperand_Success;    }    default: -    return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch; +    return MatchOperand_NoMatch;    }  } @@ -2244,6 +2255,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,    return true;  } +static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, +                                            unsigned VariantID = 0); +  bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,                                                OperandVector &Operands,                                                MCStreamer &Out, @@ -2286,8 +2300,13 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,    case Match_MissingFeature:      return Error(IDLoc, "instruction not supported on this GPU"); -  case Match_MnemonicFail: -    return Error(IDLoc, "unrecognized instruction mnemonic"); +  case Match_MnemonicFail: { +    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); +    std::string Suggestion = AMDGPUMnemonicSpellCheck( +        ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); +    return Error(IDLoc, "invalid instruction" + Suggestion, +                 ((AMDGPUOperand &)*Operands[0]).getLocRange()); +  }    case Match_InvalidOperand: {      SMLoc ErrorLoc = IDLoc; @@ -3838,7 +3857,9 @@ AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {      return Ok? MatchOperand_Success : MatchOperand_ParseFail;    } else { -    return MatchOperand_NoMatch; +    // Swizzle "offset" operand is optional. +    // If it is omitted, try parsing other optional operands. +    return parseOptionalOperand(Operands);    }  } @@ -4786,6 +4807,7 @@ extern "C" void LLVMInitializeAMDGPUAsmParser() {  #define GET_REGISTER_MATCHER  #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER  #include "AMDGPUGenAsmMatcher.inc"  // This fuction should be defined after auto-generated include so that we have diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4a3f2c975179..47a2d3f2fdc5 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -348,10 +348,12 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,    case AMDGPU::TTMP_128RegClassID:    // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in    // this bundle? -  case AMDGPU::SReg_256RegClassID: -  // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in +  case AMDGPU::SGPR_256RegClassID: +  case AMDGPU::TTMP_256RegClassID: +    // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in    // this bundle? -  case AMDGPU::SReg_512RegClassID: +  case AMDGPU::SGPR_512RegClassID: +  case AMDGPU::TTMP_512RegClassID:      shift = 2;      break;    // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in @@ -441,11 +443,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {  }  MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { -  return createSRegOperand(AMDGPU::SReg_256RegClassID, Val); +  return decodeDstOp(OPW256, Val);  }  MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { -  return createSRegOperand(AMDGPU::SReg_512RegClassID, Val); +  return decodeDstOp(OPW512, Val);  }  MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { @@ -593,6 +595,8 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {      return SGPR_32RegClassID;    case OPW64: return SGPR_64RegClassID;    case OPW128: return SGPR_128RegClassID; +  case OPW256: return SGPR_256RegClassID; +  case OPW512: return SGPR_512RegClassID;    }  } @@ -608,6 +612,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {      return TTMP_32RegClassID;    case OPW64: return TTMP_64RegClassID;    case OPW128: return TTMP_128RegClassID; +  case OPW256: return TTMP_256RegClassID; +  case OPW512: return TTMP_512RegClassID;    }  } @@ -659,6 +665,25 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c    }  } +MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { +  using namespace AMDGPU::EncValues; + +  assert(Val < 128); +  assert(Width == OPW256 || Width == OPW512); + +  if (Val <= SGPR_MAX) { +    assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. +    return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); +  } + +  int TTmpIdx = getTTmpIdx(Val); +  if (TTmpIdx >= 0) { +    return createSRegOperand(getTtmpClassId(Width), TTmpIdx); +  } + +  llvm_unreachable("unknown dst register"); +} +  MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {    using namespace AMDGPU; diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index ce396eb68c4c..75cfc5e11282 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -95,6 +95,8 @@ public:      OPW32,      OPW64,      OPW128, +    OPW256, +    OPW512,      OPW16,      OPWV216,      OPW_LAST_, @@ -110,6 +112,7 @@ public:    MCOperand decodeLiteralConstant() const;    MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; +  MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;    MCOperand decodeSpecialReg32(unsigned Val) const;    MCOperand decodeSpecialReg64(unsigned Val) const; diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 67663d39967c..bf57f88bef91 100644 --- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -335,13 +335,13 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,    } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {      O << 'v';      NumRegs = 8; -  } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(RegNo)) { +  } else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) {      O << 's';      NumRegs = 8;    } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) {      O << 'v';      NumRegs = 16; -  } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(RegNo)) { +  } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) {      O << 's';      NumRegs = 16;    } else { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 6b7c3ffb7bb8..dd0efef7f91b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -8,6 +8,26 @@  //===----------------------------------------------------------------------===//  //===----------------------------------------------------------------------===// +//  Helpers +//===----------------------------------------------------------------------===// + +class getSubRegs<int size> { +  list<SubRegIndex> ret2 = [sub0, sub1]; +  list<SubRegIndex> ret3 = [sub0, sub1, sub2]; +  list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3]; +  list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; +  list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3, +                             sub4, sub5, sub6, sub7, +                             sub8, sub9, sub10, sub11, +                             sub12, sub13, sub14, sub15]; + +  list<SubRegIndex> ret = !if(!eq(size, 2), ret2, +                              !if(!eq(size, 3), ret3, +                                  !if(!eq(size, 4), ret4, +                                      !if(!eq(size, 8), ret8, ret16)))); +} + +//===----------------------------------------------------------------------===//  //  Declarations that describe the SI registers  //===----------------------------------------------------------------------===//  class SIReg <string n, bits<16> regIdx = 0> : Register<n>, @@ -141,19 +161,19 @@ def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,  }  // SGPR 64-bit registers -def SGPR_64Regs : RegisterTuples<[sub0, sub1], +def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret,                               [(add (decimate SGPR_32, 2)),                                (add (decimate (shl SGPR_32, 1), 2))]>;  // SGPR 128-bit registers -def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], +def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,                                [(add (decimate SGPR_32, 4)),                                 (add (decimate (shl SGPR_32, 1), 4)),                                 (add (decimate (shl SGPR_32, 2), 4)),                                 (add (decimate (shl SGPR_32, 3), 4))]>;  // SGPR 256-bit registers -def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], +def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,                                [(add (decimate SGPR_32, 4)),                                 (add (decimate (shl SGPR_32, 1), 4)),                                 (add (decimate (shl SGPR_32, 2), 4)), @@ -164,8 +184,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],                                 (add (decimate (shl SGPR_32, 7), 4))]>;  // SGPR 512-bit registers -def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, -                               sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], +def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,                                [(add (decimate SGPR_32, 4)),                                 (add (decimate (shl SGPR_32, 1), 4)),                                 (add (decimate (shl SGPR_32, 2), 4)), @@ -190,47 +209,125 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,  }  // Trap handler TMP 64-bit registers -def TTMP_64Regs : RegisterTuples<[sub0, sub1], +def TTMP_64Regs : RegisterTuples<getSubRegs<2>.ret,                               [(add (decimate TTMP_32, 2)),                                (add (decimate (shl TTMP_32, 1), 2))]>;  // Trap handler TMP 128-bit registers -def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], +def TTMP_128Regs : RegisterTuples<getSubRegs<4>.ret,                                [(add (decimate TTMP_32, 4)),                                 (add (decimate (shl TTMP_32, 1), 4)),                                 (add (decimate (shl TTMP_32, 2), 4)),                                 (add (decimate (shl TTMP_32, 3), 4))]>; -class TmpRegTuples <string tgt, -                    bit Is64Bit, -                    int Index0, -                    int Index1 = !add(Index0, 1), -                    int Index2 = !add(Index0, !if(Is64Bit, 1, 2)), -                    int Index3 = !add(Index0, !if(Is64Bit, 1, 3)), -                    string name = "ttmp["#Index0#":"#Index3#"]", -                    Register r0 = !cast<Register>("TTMP"#Index0#tgt), -                    Register r1 = !cast<Register>("TTMP"#Index1#tgt), -                    Register r2 = !cast<Register>("TTMP"#Index2#tgt), -                    Register r3 = !cast<Register>("TTMP"#Index3#tgt)> : -  RegisterWithSubRegs<name, !if(Is64Bit, [r0, r1], [r0, r1, r2, r3])> { -  let SubRegIndices = !if(Is64Bit, [sub0, sub1], [sub0, sub1, sub2, sub3]); -  let HWEncoding = r0.HWEncoding; -} +def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret, +                              [(add (decimate TTMP_32, 4)), +                               (add (decimate (shl TTMP_32, 1), 4)), +                               (add (decimate (shl TTMP_32, 2), 4)), +                               (add (decimate (shl TTMP_32, 3), 4)), +                               (add (decimate (shl TTMP_32, 4), 4)), +                               (add (decimate (shl TTMP_32, 5), 4)), +                               (add (decimate (shl TTMP_32, 6), 4)), +                               (add (decimate (shl TTMP_32, 7), 4))]>; + +def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret, +                              [(add (decimate TTMP_32, 4)), +                               (add (decimate (shl TTMP_32, 1), 4)), +                               (add (decimate (shl TTMP_32, 2), 4)), +                               (add (decimate (shl TTMP_32, 3), 4)), +                               (add (decimate (shl TTMP_32, 4), 4)), +                               (add (decimate (shl TTMP_32, 5), 4)), +                               (add (decimate (shl TTMP_32, 6), 4)), +                               (add (decimate (shl TTMP_32, 7), 4)), +                               (add (decimate (shl TTMP_32, 8), 4)), +                               (add (decimate (shl TTMP_32, 9), 4)), +                               (add (decimate (shl TTMP_32, 10), 4)), +                               (add (decimate (shl TTMP_32, 11), 4)), +                               (add (decimate (shl TTMP_32, 12), 4)), +                               (add (decimate (shl TTMP_32, 13), 4)), +                               (add (decimate (shl TTMP_32, 14), 4)), +                               (add (decimate (shl TTMP_32, 15), 4))]>; + +class TmpRegTuplesBase<int index, int size, +                       list<Register> subRegs, +                       list<SubRegIndex> indices = getSubRegs<size>.ret, +                       int index1 = !add(index, !add(size, -1)), +                       string name = "ttmp["#index#":"#index1#"]"> : +  RegisterWithSubRegs<name, subRegs> { +  let HWEncoding = subRegs[0].HWEncoding; +  let SubRegIndices = indices; +} + +class TmpRegTuples<string tgt, +                   int size, +                   int index0, +                   int index1 = !add(index0, 1), +                   int index2 = !add(index0, !if(!eq(size, 2), 1, 2)), +                   int index3 = !add(index0, !if(!eq(size, 2), 1, 3)), +                   int index4 = !add(index0, !if(!eq(size, 8), 4, 1)), +                   int index5 = !add(index0, !if(!eq(size, 8), 5, 1)), +                   int index6 = !add(index0, !if(!eq(size, 8), 6, 1)), +                   int index7 = !add(index0, !if(!eq(size, 8), 7, 1)), +                   Register r0 = !cast<Register>("TTMP"#index0#tgt), +                   Register r1 = !cast<Register>("TTMP"#index1#tgt), +                   Register r2 = !cast<Register>("TTMP"#index2#tgt), +                   Register r3 = !cast<Register>("TTMP"#index3#tgt), +                   Register r4 = !cast<Register>("TTMP"#index4#tgt), +                   Register r5 = !cast<Register>("TTMP"#index5#tgt), +                   Register r6 = !cast<Register>("TTMP"#index6#tgt), +                   Register r7 = !cast<Register>("TTMP"#index7#tgt)> : +  TmpRegTuplesBase<index0, size, +                   !if(!eq(size, 2), [r0, r1], +                       !if(!eq(size, 4), [r0, r1, r2, r3], +                                         [r0, r1, r2, r3, r4, r5, r6, r7])), +                   getSubRegs<size>.ret>;  foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in { -  def TTMP#Index#_TTMP#!add(Index,1)#_vi   : TmpRegTuples<"_vi",   1, Index>; -  def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 1, Index>; +  def TTMP#Index#_TTMP#!add(Index,1)#_vi   : TmpRegTuples<"_vi",   2, Index>; +  def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;  }  foreach Index = {0, 4, 8, 12} in {    def TTMP#Index#_TTMP#!add(Index,1)#                   _TTMP#!add(Index,2)# -                 _TTMP#!add(Index,3)#_vi   : TmpRegTuples<"_vi",   0, Index>; +                 _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi",   4, Index>;    def TTMP#Index#_TTMP#!add(Index,1)#                   _TTMP#!add(Index,2)# -                 _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 0, Index>; +                 _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;  } +foreach Index = {0, 4, 8} in { +  def TTMP#Index#_TTMP#!add(Index,1)# +                 _TTMP#!add(Index,2)# +                 _TTMP#!add(Index,3)# +                 _TTMP#!add(Index,4)# +                 _TTMP#!add(Index,5)# +                 _TTMP#!add(Index,6)# +                 _TTMP#!add(Index,7)#_vi : TmpRegTuples<"_vi",   8, Index>; +  def TTMP#Index#_TTMP#!add(Index,1)# +                 _TTMP#!add(Index,2)# +                 _TTMP#!add(Index,3)# +                 _TTMP#!add(Index,4)# +                 _TTMP#!add(Index,5)# +                 _TTMP#!add(Index,6)# +                 _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>; +} + +def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi : +  TmpRegTuplesBase<0, 16, +                   [TTMP0_vi, TTMP1_vi, TTMP2_vi, TTMP3_vi, +                    TTMP4_vi, TTMP5_vi, TTMP6_vi, TTMP7_vi, +                    TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi, +                    TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>; + +def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 : +  TmpRegTuplesBase<0, 16, +                   [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9, +                    TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9, +                    TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9, +                    TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>; + +  // VGPR 32-bit registers  // i16/f16 only on VI+  def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, @@ -240,25 +337,25 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,  }  // VGPR 64-bit registers -def VGPR_64 : RegisterTuples<[sub0, sub1], +def VGPR_64 : RegisterTuples<getSubRegs<2>.ret,                               [(add (trunc VGPR_32, 255)),                                (add (shl VGPR_32, 1))]>;  // VGPR 96-bit registers -def VGPR_96 : RegisterTuples<[sub0, sub1, sub2], +def VGPR_96 : RegisterTuples<getSubRegs<3>.ret,                               [(add (trunc VGPR_32, 254)),                                (add (shl VGPR_32, 1)),                                (add (shl VGPR_32, 2))]>;  // VGPR 128-bit registers -def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], +def VGPR_128 : RegisterTuples<getSubRegs<4>.ret,                                [(add (trunc VGPR_32, 253)),                                 (add (shl VGPR_32, 1)),                                 (add (shl VGPR_32, 2)),                                 (add (shl VGPR_32, 3))]>;  // VGPR 256-bit registers -def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], +def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,                                [(add (trunc VGPR_32, 249)),                                 (add (shl VGPR_32, 1)),                                 (add (shl VGPR_32, 2)), @@ -269,8 +366,7 @@ def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],                                 (add (shl VGPR_32, 7))]>;  // VGPR 512-bit registers -def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, -                               sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], +def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,                                [(add (trunc VGPR_32, 241)),                                 (add (shl VGPR_32, 1)),                                 (add (shl VGPR_32, 2)), @@ -368,13 +464,31 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32,  } // End CopyCost = 2 -def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { +def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { +  let AllocationPriority = 11; +} + +def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { +  let isAllocatable = 0; +} + +def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, +  (add SGPR_256, TTMP_256)> {    // Requires 4 s_mov_b64 to copy    let CopyCost = 4;    let AllocationPriority = 11;  } -def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> { +def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> { +  let AllocationPriority = 12; +} + +def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> { +  let isAllocatable = 0; +} + +def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, +  (add SGPR_512, TTMP_512)> {    // Requires 8 s_mov_b64 to copy    let CopyCost = 8;    let AllocationPriority = 12; diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 819a7add0be4..125a3b22d0cf 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -667,6 +667,10 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {    CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \    CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \    CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ +  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ +  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ +  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ +  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \    }  #define CASE_CI_VI(node) \  | 
