diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-17 20:41:09 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-17 20:41:09 +0000 |
| commit | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (patch) | |
| tree | e6e4a4163840b73ba54bb0d3b70ee4899e4b7434 /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | |
| parent | b1c73532ee8997fe5dfbeb7d223027bdf99758a0 (diff) | |
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 133 |
1 files changed, 104 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 092845d391a3..3b69a37728ea 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -373,7 +373,7 @@ public: bool isOffen() const { return isImmTy(ImmTyOffen); } bool isIdxen() const { return isImmTy(ImmTyIdxen); } bool isAddr64() const { return isImmTy(ImmTyAddr64); } - bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } + bool isOffset() const { return isImmTy(ImmTyOffset); } bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } @@ -893,6 +893,7 @@ public: bool isSDelayALU() const; bool isHwreg() const; bool isSendMsg() const; + bool isSplitBarrier() const; bool isSwizzle() const; bool isSMRDOffset8() const; bool isSMEMOffset() const; @@ -1665,6 +1666,7 @@ private: SMLoc getInstLoc(const OperandVector &Operands) const; bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); + bool validateOffset(const MCInst &Inst, const OperandVector &Operands); bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); bool validateSOPLiteral(const MCInst &Inst) const; @@ -1856,6 +1858,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_KIMM32: + case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: return &APFloat::IEEEsingle(); case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: @@ -2185,7 +2188,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_KIMM32: - case AMDGPU::OPERAND_KIMM16: { + case AMDGPU::OPERAND_KIMM16: + case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { bool lost; APFloat FPLiteral(APFloat::IEEEdouble(), Literal); // Convert literal to single precision @@ -2226,6 +2230,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: + case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: if (isSafeTruncation(Val, 32) && AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), AsmParser->hasInv2PiInlineImm())) { @@ -2570,7 +2575,7 @@ static bool isRegularReg(RegisterKind Kind) { static const RegInfo* getRegularRegInfo(StringRef Str) { for (const RegInfo &Reg : RegularRegisters) - if (Str.startswith(Reg.Name)) + if (Str.starts_with(Reg.Name)) return &Reg; return nullptr; } @@ -2630,7 +2635,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, if (RegKind == IS_SGPR || RegKind == IS_TTMP) { // SGPR and TTMP registers must be aligned. // Max required alignment is 4 dwords. - AlignSize = std::min(RegWidth / 32, 4u); + AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); } if (RegNum % AlignSize != 0) { @@ -3411,12 +3416,16 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { case AMDGPU::V_LSHLREV_B64_e64: case AMDGPU::V_LSHLREV_B64_gfx10: case AMDGPU::V_LSHLREV_B64_e64_gfx11: + case AMDGPU::V_LSHLREV_B64_e32_gfx12: + case AMDGPU::V_LSHLREV_B64_e64_gfx12: case AMDGPU::V_LSHRREV_B64_e64: case AMDGPU::V_LSHRREV_B64_gfx10: case AMDGPU::V_LSHRREV_B64_e64_gfx11: + case AMDGPU::V_LSHRREV_B64_e64_gfx12: case AMDGPU::V_ASHRREV_I64_e64: case AMDGPU::V_ASHRREV_I64_gfx10: case AMDGPU::V_ASHRREV_I64_e64_gfx11: + case AMDGPU::V_ASHRREV_I64_e64_gfx12: case AMDGPU::V_LSHL_B64_e64: case AMDGPU::V_LSHR_B64_e64: case AMDGPU::V_ASHR_I64_e64: @@ -3571,8 +3580,12 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( : MCRegister::NoRegister; }; + // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. + bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; + const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); - auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx); + auto InvalidCompOprIdx = + InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); if (!InvalidCompOprIdx) return true; @@ -4131,6 +4144,40 @@ SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { return getLoc(); } +bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, + const OperandVector &Operands) { + auto Opcode = Inst.getOpcode(); + auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); + if (OpNum == -1) + return true; + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & SIInstrFlags::FLAT)) + return validateFlatOffset(Inst, Operands); + + if ((TSFlags & SIInstrFlags::SMRD)) + return validateSMEMOffset(Inst, Operands); + + const auto &Op = Inst.getOperand(OpNum); + if (isGFX12Plus() && + (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { + const unsigned OffsetSize = 24; + if (!isIntN(OffsetSize, Op.getImm())) { + Error(getFlatOffsetLoc(Operands), + Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); + return false; + } + } else { + const unsigned OffsetSize = 16; + if (!isUIntN(OffsetSize, Op.getImm())) { + Error(getFlatOffsetLoc(Operands), + Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); + return false; + } + } + return true; +} + bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, const OperandVector &Operands) { uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; @@ -4148,11 +4195,12 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, return false; } - // For FLAT segment the offset must be positive; + // For pre-GFX12 FLAT instructions the offset must be positive; // MSB is ignored and forced to zero. unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); bool AllowNegative = - TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); + (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || + isGFX12Plus(); if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { Error(getFlatOffsetLoc(Operands), Twine("expected a ") + @@ -4479,7 +4527,7 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, SMLoc BLGPLoc = getBLGPLoc(Operands); if (!BLGPLoc.isValid()) return true; - bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); + bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); auto FB = getFeatureBits(); bool UsesNeg = false; if (FB[AMDGPU::FeatureGFX940Insts]) { @@ -4788,10 +4836,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, if (!validateMovrels(Inst, Operands)) { return false; } - if (!validateFlatOffset(Inst, Operands)) { - return false; - } - if (!validateSMEMOffset(Inst, Operands)) { + if (!validateOffset(Inst, Operands)) { return false; } if (!validateMAIAccWrite(Inst, Operands)) { @@ -5334,11 +5379,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, ValRange); } else if (ID == ".amdhsa_dx10_clamp") { + if (IVersion.Major >= 12) + return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val, + ValRange); } else if (ID == ".amdhsa_ieee_mode") { - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, - Val, ValRange); + if (IVersion.Major >= 12) + return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val, + ValRange); } else if (ID == ".amdhsa_fp16_overflow") { if (IVersion.Major < 9) return Error(IDRange.Start, "directive requires gfx9+", IDRange); @@ -5401,6 +5452,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, Val, ValRange); + } else if (ID == ".amdhsa_round_robin_scheduling") { + if (IVersion.Major < 12) + return Error(IDRange.Start, "directive requires gfx12+", IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val, + ValRange); } else { return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); } @@ -5554,6 +5611,18 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, } Lex(); + if (ID == "enable_dx10_clamp") { + if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) && + isGFX12Plus()) + return TokError("enable_dx10_clamp=1 is not allowed on GFX12+"); + } + + if (ID == "enable_ieee_mode") { + if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) && + isGFX12Plus()) + return TokError("enable_ieee_mode=1 is not allowed on GFX12+"); + } + if (ID == "enable_wavefront_size32") { if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { if (!isGFX10Plus()) @@ -5974,20 +6043,20 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { setForcedDPP(false); setForcedSDWA(false); - if (Name.endswith("_e64_dpp")) { + if (Name.ends_with("_e64_dpp")) { setForcedDPP(true); setForcedEncodingSize(64); return Name.substr(0, Name.size() - 8); - } else if (Name.endswith("_e64")) { + } else if (Name.ends_with("_e64")) { setForcedEncodingSize(64); return Name.substr(0, Name.size() - 4); - } else if (Name.endswith("_e32")) { + } else if (Name.ends_with("_e32")) { setForcedEncodingSize(32); return Name.substr(0, Name.size() - 4); - } else if (Name.endswith("_dpp")) { + } else if (Name.ends_with("_dpp")) { setForcedDPP(true); return Name.substr(0, Name.size() - 4); - } else if (Name.endswith("_sdwa")) { + } else if (Name.ends_with("_sdwa")) { setForcedSDWA(true); return Name.substr(0, Name.size() - 5); } @@ -6010,7 +6079,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); - bool IsMIMG = Name.startswith("image_"); + bool IsMIMG = Name.starts_with("image_"); while (!trySkipToken(AsmToken::EndOfStatement)) { OperandMode Mode = OperandMode_Default; @@ -6150,7 +6219,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const { Disabling = Id.consume_front("no"); - if (isGFX940() && !Mnemo.startswith("s_")) { + if (isGFX940() && !Mnemo.starts_with("s_")) { return StringSwitch<unsigned>(Id) .Case("nt", AMDGPU::CPol::NT) .Case("sc0", AMDGPU::CPol::SC0) @@ -6282,13 +6351,13 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || Value == "TH_LOAD_NT_WB") { return Error(StringLoc, "invalid th value"); - } else if (Value.startswith("TH_ATOMIC_")) { + } else if (Value.starts_with("TH_ATOMIC_")) { Value = Value.drop_front(10); TH = AMDGPU::CPol::TH_TYPE_ATOMIC; - } else if (Value.startswith("TH_LOAD_")) { + } else if (Value.starts_with("TH_LOAD_")) { Value = Value.drop_front(8); TH = AMDGPU::CPol::TH_TYPE_LOAD; - } else if (Value.startswith("TH_STORE_")) { + } else if (Value.starts_with("TH_STORE_")) { Value = Value.drop_front(9); TH = AMDGPU::CPol::TH_TYPE_STORE; } else { @@ -6733,7 +6802,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); bool Failed = true; - bool Sat = CntName.endswith("_sat"); + bool Sat = CntName.ends_with("_sat"); if (CntName == "vmcnt" || CntName == "vmcnt_sat") { Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); @@ -7206,7 +7275,7 @@ ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { if (!parseId(Str)) return ParseStatus::NoMatch; - if (!Str.startswith("attr")) + if (!Str.starts_with("attr")) return Error(S, "invalid interpolation attribute"); StringRef Chan = Str.take_back(2); @@ -7297,7 +7366,7 @@ bool AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { if (isToken(AsmToken::Identifier)) { StringRef Tok = getTokenStr(); - if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { + if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { lex(); return true; } @@ -8446,7 +8515,7 @@ bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { Token += Suffix; StringRef DimId = Token; - if (DimId.startswith("SQ_RSRC_IMG_")) + if (DimId.starts_with("SQ_RSRC_IMG_")) DimId = DimId.drop_front(12); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); @@ -9129,3 +9198,9 @@ bool AMDGPUOperand::isWaitVDST() const { bool AMDGPUOperand::isWaitEXP() const { return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); } + +//===----------------------------------------------------------------------===// +// Split Barrier +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } |
