Vendor import of llvm-project main llvmorg-18-init-15088-gd14ee76181fb. - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-12-17 20:41:09 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-12-17 20:41:09 +0000
commit	312c0ed19cc5276a17bacf2120097bec4515b0f1 (patch)
tree	e6e4a4163840b73ba54bb0d3b70ee4899e4b7434 /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
parent	b1c73532ee8997fe5dfbeb7d223027bdf99758a0 (diff)

vendor/llvm-project/llvmorg-18-init-15088-gd14ee76181fb

Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')

-rw-r--r--

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

133

1 files changed, 104 insertions, 29 deletions

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 092845d391a3..3b69a37728ea 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

@@ -373,7 +373,7 @@ public:

bool isOffen() const { return isImmTy(ImmTyOffen); }

bool isIdxen() const { return isImmTy(ImmTyIdxen); }

bool isAddr64() const { return isImmTy(ImmTyAddr64); }

- bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }

+ bool isOffset() const { return isImmTy(ImmTyOffset); }

bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }

bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }

bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }

@@ -893,6 +893,7 @@ public:

bool isSDelayALU() const;

bool isHwreg() const;

bool isSendMsg() const;

+ bool isSplitBarrier() const;

bool isSwizzle() const;

bool isSMRDOffset8() const;

bool isSMEMOffset() const;

@@ -1665,6 +1666,7 @@ private:

SMLoc getInstLoc(const OperandVector &Operands) const;

bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);

+ bool validateOffset(const MCInst &Inst, const OperandVector &Operands);

bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);

bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);

bool validateSOPLiteral(const MCInst &Inst) const;

@@ -1856,6 +1858,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {

case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:

case AMDGPU::OPERAND_REG_IMM_V2INT32:

case AMDGPU::OPERAND_KIMM32:

+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:

return &APFloat::IEEEsingle();

case AMDGPU::OPERAND_REG_IMM_INT64:

case AMDGPU::OPERAND_REG_IMM_FP64:

@@ -2185,7 +2188,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:

case AMDGPU::OPERAND_REG_IMM_V2INT32:

case AMDGPU::OPERAND_KIMM32:

- case AMDGPU::OPERAND_KIMM16: {

+ case AMDGPU::OPERAND_KIMM16:

+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {

bool lost;

APFloat FPLiteral(APFloat::IEEEdouble(), Literal);

// Convert literal to single precision

@@ -2226,6 +2230,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:

case AMDGPU::OPERAND_REG_IMM_V2INT32:

case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:

+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:

if (isSafeTruncation(Val, 32) &&

AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),

AsmParser->hasInv2PiInlineImm())) {

@@ -2570,7 +2575,7 @@ static bool isRegularReg(RegisterKind Kind) {

static const RegInfo* getRegularRegInfo(StringRef Str) {

for (const RegInfo &Reg : RegularRegisters)

- if (Str.startswith(Reg.Name))

+ if (Str.starts_with(Reg.Name))

return &Reg;

return nullptr;

}

@@ -2630,7 +2635,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,

if (RegKind == IS_SGPR || RegKind == IS_TTMP) {

// SGPR and TTMP registers must be aligned.

// Max required alignment is 4 dwords.

- AlignSize = std::min(RegWidth / 32, 4u);

+ AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);

}

if (RegNum % AlignSize != 0) {

@@ -3411,12 +3416,16 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {

case AMDGPU::V_LSHLREV_B64_e64:

case AMDGPU::V_LSHLREV_B64_gfx10:

case AMDGPU::V_LSHLREV_B64_e64_gfx11:

+ case AMDGPU::V_LSHLREV_B64_e32_gfx12:

+ case AMDGPU::V_LSHLREV_B64_e64_gfx12:

case AMDGPU::V_LSHRREV_B64_e64:

case AMDGPU::V_LSHRREV_B64_gfx10:

case AMDGPU::V_LSHRREV_B64_e64_gfx11:

+ case AMDGPU::V_LSHRREV_B64_e64_gfx12:

case AMDGPU::V_ASHRREV_I64_e64:

case AMDGPU::V_ASHRREV_I64_gfx10:

case AMDGPU::V_ASHRREV_I64_e64_gfx11:

+ case AMDGPU::V_ASHRREV_I64_e64_gfx12:

case AMDGPU::V_LSHL_B64_e64:

case AMDGPU::V_LSHR_B64_e64:

case AMDGPU::V_ASHR_I64_e64:

@@ -3571,8 +3580,12 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(

: MCRegister::NoRegister;

};

+ // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.

+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;

const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);

- auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);

+ auto InvalidCompOprIdx =

+ InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);

if (!InvalidCompOprIdx)

return true;

@@ -4131,6 +4144,40 @@ SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {

return getLoc();

}

+bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,

+ const OperandVector &Operands) {

+ auto Opcode = Inst.getOpcode();

+ auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);

+ if (OpNum == -1)

+ return true;

+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

+ if ((TSFlags & SIInstrFlags::FLAT))

+ return validateFlatOffset(Inst, Operands);

+ if ((TSFlags & SIInstrFlags::SMRD))

+ return validateSMEMOffset(Inst, Operands);

+ const auto &Op = Inst.getOperand(OpNum);

+ if (isGFX12Plus() &&

+ (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {

+ const unsigned OffsetSize = 24;

+ if (!isIntN(OffsetSize, Op.getImm())) {

+ Error(getFlatOffsetLoc(Operands),

+ Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");

+ return false;

+ }

+ } else {

+ const unsigned OffsetSize = 16;

+ if (!isUIntN(OffsetSize, Op.getImm())) {

+ Error(getFlatOffsetLoc(Operands),

+ Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");

+ return false;

+ }

+ return true;

bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,

const OperandVector &Operands) {

uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

@@ -4148,11 +4195,12 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,

return false;

}

- // For FLAT segment the offset must be positive;

+ // For pre-GFX12 FLAT instructions the offset must be positive;

// MSB is ignored and forced to zero.

unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());

bool AllowNegative =

- TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);

+ (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||

+ isGFX12Plus();

if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {

Error(getFlatOffsetLoc(Operands),

Twine("expected a ") +

@@ -4479,7 +4527,7 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,

SMLoc BLGPLoc = getBLGPLoc(Operands);

if (!BLGPLoc.isValid())

return true;

- bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");

+ bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");

auto FB = getFeatureBits();

bool UsesNeg = false;

if (FB[AMDGPU::FeatureGFX940Insts]) {

@@ -4788,10 +4836,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,

if (!validateMovrels(Inst, Operands)) {

return false;

}

- if (!validateFlatOffset(Inst, Operands)) {

- return false;

- }

- if (!validateSMEMOffset(Inst, Operands)) {

+ if (!validateOffset(Inst, Operands)) {

return false;

}

if (!validateMAIAccWrite(Inst, Operands)) {

@@ -5334,11 +5379,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,

ValRange);

} else if (ID == ".amdhsa_dx10_clamp") {

+ if (IVersion.Major >= 12)

+ return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);

PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

- COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);

+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,

+ ValRange);

} else if (ID == ".amdhsa_ieee_mode") {

- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,

- Val, ValRange);

+ if (IVersion.Major >= 12)

+ return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);

+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,

+ ValRange);

} else if (ID == ".amdhsa_fp16_overflow") {

if (IVersion.Major < 9)

return Error(IDRange.Start, "directive requires gfx9+", IDRange);

@@ -5401,6 +5452,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,

Val, ValRange);

+ } else if (ID == ".amdhsa_round_robin_scheduling") {

+ if (IVersion.Major < 12)

+ return Error(IDRange.Start, "directive requires gfx12+", IDRange);

+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,

+ ValRange);

} else {

return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);

}

@@ -5554,6 +5611,18 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,

}

Lex();

+ if (ID == "enable_dx10_clamp") {

+ if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&

+ isGFX12Plus())

+ return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");

+ }

+ if (ID == "enable_ieee_mode") {

+ if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&

+ isGFX12Plus())

+ return TokError("enable_ieee_mode=1 is not allowed on GFX12+");

+ }

if (ID == "enable_wavefront_size32") {

if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {

if (!isGFX10Plus())

@@ -5974,20 +6043,20 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {

setForcedDPP(false);

setForcedSDWA(false);

- if (Name.endswith("_e64_dpp")) {

+ if (Name.ends_with("_e64_dpp")) {

setForcedDPP(true);

setForcedEncodingSize(64);

return Name.substr(0, Name.size() - 8);

- } else if (Name.endswith("_e64")) {

+ } else if (Name.ends_with("_e64")) {

setForcedEncodingSize(64);

return Name.substr(0, Name.size() - 4);

- } else if (Name.endswith("_e32")) {

+ } else if (Name.ends_with("_e32")) {

setForcedEncodingSize(32);

return Name.substr(0, Name.size() - 4);

- } else if (Name.endswith("_dpp")) {

+ } else if (Name.ends_with("_dpp")) {

setForcedDPP(true);

return Name.substr(0, Name.size() - 4);

- } else if (Name.endswith("_sdwa")) {

+ } else if (Name.ends_with("_sdwa")) {

setForcedSDWA(true);

return Name.substr(0, Name.size() - 5);

}

@@ -6010,7 +6079,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,

Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));

- bool IsMIMG = Name.startswith("image_");

+ bool IsMIMG = Name.starts_with("image_");

while (!trySkipToken(AsmToken::EndOfStatement)) {

OperandMode Mode = OperandMode_Default;

@@ -6150,7 +6219,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,

bool &Disabling) const {

Disabling = Id.consume_front("no");

- if (isGFX940() && !Mnemo.startswith("s_")) {

+ if (isGFX940() && !Mnemo.starts_with("s_")) {

return StringSwitch<unsigned>(Id)

.Case("nt", AMDGPU::CPol::NT)

.Case("sc0", AMDGPU::CPol::SC0)

@@ -6282,13 +6351,13 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {

else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||

Value == "TH_LOAD_NT_WB") {

return Error(StringLoc, "invalid th value");

- } else if (Value.startswith("TH_ATOMIC_")) {

+ } else if (Value.starts_with("TH_ATOMIC_")) {

Value = Value.drop_front(10);

TH = AMDGPU::CPol::TH_TYPE_ATOMIC;

- } else if (Value.startswith("TH_LOAD_")) {

+ } else if (Value.starts_with("TH_LOAD_")) {

Value = Value.drop_front(8);

TH = AMDGPU::CPol::TH_TYPE_LOAD;

- } else if (Value.startswith("TH_STORE_")) {

+ } else if (Value.starts_with("TH_STORE_")) {

Value = Value.drop_front(9);

TH = AMDGPU::CPol::TH_TYPE_STORE;

} else {

@@ -6733,7 +6802,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {

AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());

bool Failed = true;

- bool Sat = CntName.endswith("_sat");

+ bool Sat = CntName.ends_with("_sat");

if (CntName == "vmcnt" || CntName == "vmcnt_sat") {

Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);

@@ -7206,7 +7275,7 @@ ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {

if (!parseId(Str))

return ParseStatus::NoMatch;

- if (!Str.startswith("attr"))

+ if (!Str.starts_with("attr"))

return Error(S, "invalid interpolation attribute");

StringRef Chan = Str.take_back(2);

@@ -7297,7 +7366,7 @@ bool

AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {

if (isToken(AsmToken::Identifier)) {

StringRef Tok = getTokenStr();

- if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {

+ if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {

lex();

return true;

}

@@ -8446,7 +8515,7 @@ bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {

Token += Suffix;

StringRef DimId = Token;

- if (DimId.startswith("SQ_RSRC_IMG_"))

+ if (DimId.starts_with("SQ_RSRC_IMG_"))

DimId = DimId.drop_front(12);

const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);

@@ -9129,3 +9198,9 @@ bool AMDGPUOperand::isWaitVDST() const {

bool AMDGPUOperand::isWaitEXP() const {

return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());

}

+//===----------------------------------------------------------------------===//

+// Split Barrier

+//===----------------------------------------------------------------------===//

+bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }