summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp1524
1 files changed, 1166 insertions, 358 deletions
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index efcf1b23adaa..a6c31629e7c4 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -15,38 +15,62 @@
#include "Utils/AMDKernelCodeTUtils.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
using namespace llvm;
+using namespace llvm::AMDGPU;
namespace {
-struct OptionalOperand;
+class AMDGPUAsmParser;
enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
+//===----------------------------------------------------------------------===//
+// Operand
+//===----------------------------------------------------------------------===//
+
class AMDGPUOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
@@ -56,16 +80,18 @@ class AMDGPUOperand : public MCParsedAsmOperand {
} Kind;
SMLoc StartLoc, EndLoc;
+ const AMDGPUAsmParser *AsmParser;
public:
- AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
+ : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
typedef std::unique_ptr<AMDGPUOperand> Ptr;
struct Modifiers {
- bool Abs;
- bool Neg;
- bool Sext;
+ bool Abs = false;
+ bool Neg = false;
+ bool Sext = false;
bool hasFPModifiers() const { return Abs || Neg; }
bool hasIntModifiers() const { return Sext; }
@@ -126,8 +152,15 @@ public:
ImmTyDA,
ImmTyR128,
ImmTyLWE,
+ ImmTyExpTgt,
+ ImmTyExpCompr,
+ ImmTyExpVM,
ImmTyHwreg,
+ ImmTyOff,
ImmTySendMsg,
+ ImmTyInterpSlot,
+ ImmTyInterpAttr,
+ ImmTyAttrChan
};
struct TokOp {
@@ -136,18 +169,16 @@ public:
};
struct ImmOp {
- bool IsFPImm;
- ImmTy Type;
int64_t Val;
+ ImmTy Type;
+ bool IsFPImm;
Modifiers Mods;
};
struct RegOp {
unsigned RegNo;
- Modifiers Mods;
- const MCRegisterInfo *TRI;
- const MCSubtargetInfo *STI;
bool IsForcedVOP3;
+ Modifiers Mods;
};
union {
@@ -175,20 +206,8 @@ public:
return Kind == Immediate;
}
- bool isInlinableImm() const {
- if (!isImmTy(ImmTyNone)) {
- // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
- return false;
- }
- // TODO: We should avoid using host float here. It would be better to
- // check the float bit values which is what a few other places do.
- // We've had bot failures before due to weird NaN support on mips hosts.
- const float F = BitsToFloat(Imm.Val);
- // TODO: Add 1/(2*pi) for VI
- return (Imm.Val <= 64 && Imm.Val >= -16) ||
- (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
- F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0);
- }
+ bool isInlinableImm(MVT type) const;
+ bool isLiteralImm(MVT type) const;
bool isRegKind() const {
return Kind == Register;
@@ -198,18 +217,46 @@ public:
return isRegKind() && !Reg.Mods.hasModifiers();
}
- bool isRegOrImmWithInputMods() const {
- return isRegKind() || isInlinableImm();
+ bool isRegOrImmWithInputMods(MVT type) const {
+ return isRegKind() || isInlinableImm(type);
+ }
+
+ bool isRegOrImmWithInt16InputMods() const {
+ return isRegOrImmWithInputMods(MVT::i16);
+ }
+
+ bool isRegOrImmWithInt32InputMods() const {
+ return isRegOrImmWithInputMods(MVT::i32);
+ }
+
+ bool isRegOrImmWithInt64InputMods() const {
+ return isRegOrImmWithInputMods(MVT::i64);
+ }
+
+ bool isRegOrImmWithFP16InputMods() const {
+ return isRegOrImmWithInputMods(MVT::f16);
+ }
+
+ bool isRegOrImmWithFP32InputMods() const {
+ return isRegOrImmWithInputMods(MVT::f32);
+ }
+
+ bool isRegOrImmWithFP64InputMods() const {
+ return isRegOrImmWithInputMods(MVT::f64);
+ }
+
+ bool isVReg32OrOff() const {
+ return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
}
bool isImmTy(ImmTy ImmT) const {
return isImm() && Imm.Type == ImmT;
}
-
+
bool isImmModifier() const {
return isImm() && Imm.Type != ImmTyNone;
}
-
+
bool isClampSI() const { return isImmTy(ImmTyClampSI); }
bool isOModSI() const { return isImmTy(ImmTyOModSI); }
bool isDMask() const { return isImmTy(ImmTyDMask); }
@@ -217,6 +264,10 @@ public:
bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128() const { return isImmTy(ImmTyUNorm); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
+ bool isOff() const { return isImmTy(ImmTyOff); }
+ bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
+ bool isExpVM() const { return isImmTy(ImmTyExpVM); }
+ bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
bool isOffen() const { return isImmTy(ImmTyOffen); }
bool isIdxen() const { return isImmTy(ImmTyIdxen); }
bool isAddr64() const { return isImmTy(ImmTyAddr64); }
@@ -234,7 +285,10 @@ public:
bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
-
+ bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
+ bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
+ bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
+
bool isMod() const {
return isClampSI() || isOModSI();
}
@@ -243,47 +297,112 @@ public:
return isReg() || isImm();
}
- bool isRegClass(unsigned RCID) const {
- return isReg() && Reg.TRI->getRegClass(RCID).contains(getReg());
+ bool isRegClass(unsigned RCID) const;
+
+ bool isSCSrcB16() const {
+ return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i16);
+ }
+
+ bool isSCSrcB32() const {
+ return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i32);
+ }
+
+ bool isSCSrcB64() const {
+ return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::i64);
+ }
+
+ bool isSCSrcF16() const {
+ return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f16);
}
- bool isSCSrc32() const {
- return isInlinableImm() || isRegClass(AMDGPU::SReg_32RegClassID);
+ bool isSCSrcF32() const {
+ return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f32);
}
- bool isSCSrc64() const {
- return isInlinableImm() || isRegClass(AMDGPU::SReg_64RegClassID);
+ bool isSCSrcF64() const {
+ return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::f64);
}
- bool isSSrc32() const {
- return isImm() || isSCSrc32() || isExpr();
+ bool isSSrcB32() const {
+ return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
}
- bool isSSrc64() const {
+ bool isSSrcB16() const {
+ return isSCSrcB16() || isLiteralImm(MVT::i16);
+ }
+
+ bool isSSrcB64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
- return isImm() || isSCSrc64();
+ return isSCSrcB64() || isLiteralImm(MVT::i64);
+ }
+
+ bool isSSrcF32() const {
+ return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
+ }
+
+ bool isSSrcF64() const {
+ return isSCSrcB64() || isLiteralImm(MVT::f64);
+ }
+
+ bool isSSrcF16() const {
+ return isSCSrcB16() || isLiteralImm(MVT::f16);
}
- bool isVCSrc32() const {
- return isInlinableImm() || isRegClass(AMDGPU::VS_32RegClassID);
+ bool isVCSrcB32() const {
+ return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i32);
}
- bool isVCSrc64() const {
- return isInlinableImm() || isRegClass(AMDGPU::VS_64RegClassID);
+ bool isVCSrcB64() const {
+ return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::i64);
}
- bool isVSrc32() const {
- return isImm() || isVCSrc32();
+ bool isVCSrcB16() const {
+ return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i16);
}
- bool isVSrc64() const {
- // TODO: Check if the 64-bit value (coming from assembly source) can be
- // narrowed to 32 bits (in the instruction stream). That require knowledge
- // of instruction type (unsigned/signed, floating or "untyped"/B64),
- // see [AMD GCN3 ISA 6.3.1].
- // TODO: How 64-bit values are formed from 32-bit literals in _B64 insns?
- return isImm() || isVCSrc64();
+ bool isVCSrcF32() const {
+ return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f32);
+ }
+
+ bool isVCSrcF64() const {
+ return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::f64);
+ }
+
+ bool isVCSrcF16() const {
+ return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f16);
+ }
+
+ bool isVSrcB32() const {
+ return isVCSrcF32() || isLiteralImm(MVT::i32);
+ }
+
+ bool isVSrcB64() const {
+ return isVCSrcF64() || isLiteralImm(MVT::i64);
+ }
+
+ bool isVSrcB16() const {
+ return isVCSrcF16() || isLiteralImm(MVT::i16);
+ }
+
+ bool isVSrcF32() const {
+ return isVCSrcF32() || isLiteralImm(MVT::f32);
+ }
+
+ bool isVSrcF64() const {
+ return isVCSrcF64() || isLiteralImm(MVT::f64);
+ }
+
+ bool isVSrcF16() const {
+ return isVCSrcF16() || isLiteralImm(MVT::f16);
+ }
+
+ bool isKImmFP32() const {
+ return isLiteralImm(MVT::f32);
+ }
+
+ bool isKImmFP16() const {
+ return isLiteralImm(MVT::f16);
}
bool isMem() const override {
@@ -301,9 +420,11 @@ public:
bool isSWaitCnt() const;
bool isHwreg() const;
bool isSendMsg() const;
- bool isSMRDOffset() const;
+ bool isSMRDOffset8() const;
+ bool isSMRDOffset20() const;
bool isSMRDLiteralOffset() const;
bool isDPPCtrl() const;
+ bool isGPRIdxMode() const;
StringRef getExpressionAsToken() const {
assert(isExpr());
@@ -311,7 +432,6 @@ public:
return S->getSymbol().getName();
}
-
StringRef getToken() const {
assert(isToken());
@@ -359,7 +479,7 @@ public:
bool hasModifiers() const {
return getModifiers().hasModifiers();
}
-
+
bool hasFPModifiers() const {
return getModifiers().hasFPModifiers();
}
@@ -368,30 +488,23 @@ public:
return getModifiers().hasIntModifiers();
}
- void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const {
- if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers()) {
- // Apply modifiers to immediate value
- int64_t Val = Imm.Val;
- bool Negate = Imm.Mods.Neg; // Only negate can get here
- if (Imm.IsFPImm) {
- APFloat F(BitsToFloat(Val));
- if (Negate) {
- F.changeSign();
- }
- Val = F.bitcastToAPInt().getZExtValue();
- } else {
- Val = Negate ? -Val : Val;
- }
- Inst.addOperand(MCOperand::createImm(Val));
- } else {
- Inst.addOperand(MCOperand::createImm(getImm()));
- }
+ void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
+
+ void addLiteralImmOperand(MCInst &Inst, int64_t Val) const;
+
+ template <unsigned Bitwidth>
+ void addKImmFPOperands(MCInst &Inst, unsigned N) const;
+
+ void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
+ addKImmFPOperands<16>(Inst, N);
}
- void addRegOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
+ void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
+ addKImmFPOperands<32>(Inst, N);
}
+ void addRegOperands(MCInst &Inst, unsigned N) const;
+
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
if (isRegKind())
addRegOperands(Inst, N);
@@ -430,7 +543,7 @@ public:
}
}
- void printImmTy(raw_ostream& OS, ImmTy Type) const {
+ static void printImmTy(raw_ostream& OS, ImmTy Type) {
switch (Type) {
case ImmTyNone: OS << "None"; break;
case ImmTyGDS: OS << "GDS"; break;
@@ -458,8 +571,15 @@ public:
case ImmTyDA: OS << "DA"; break;
case ImmTyR128: OS << "R128"; break;
case ImmTyLWE: OS << "LWE"; break;
+ case ImmTyOff: OS << "Off"; break;
+ case ImmTyExpTgt: OS << "ExpTgt"; break;
+ case ImmTyExpCompr: OS << "ExpCompr"; break;
+ case ImmTyExpVM: OS << "ExpVM"; break;
case ImmTyHwreg: OS << "Hwreg"; break;
case ImmTySendMsg: OS << "SendMsg"; break;
+ case ImmTyInterpSlot: OS << "InterpSlot"; break;
+ case ImmTyInterpAttr: OS << "InterpAttr"; break;
+ case ImmTyAttrChan: OS << "AttrChan"; break;
}
}
@@ -484,22 +604,24 @@ public:
}
}
- static AMDGPUOperand::Ptr CreateImm(int64_t Val, SMLoc Loc,
+ static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
+ int64_t Val, SMLoc Loc,
enum ImmTy Type = ImmTyNone,
bool IsFPImm = false) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
+ auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
Op->Imm.Val = Val;
Op->Imm.IsFPImm = IsFPImm;
Op->Imm.Type = Type;
- Op->Imm.Mods = {false, false, false};
+ Op->Imm.Mods = Modifiers();
Op->StartLoc = Loc;
Op->EndLoc = Loc;
return Op;
}
- static AMDGPUOperand::Ptr CreateToken(StringRef Str, SMLoc Loc,
+ static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
+ StringRef Str, SMLoc Loc,
bool HasExplicitEncodingSize = true) {
- auto Res = llvm::make_unique<AMDGPUOperand>(Token);
+ auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
Res->StartLoc = Loc;
@@ -507,24 +629,22 @@ public:
return Res;
}
- static AMDGPUOperand::Ptr CreateReg(unsigned RegNo, SMLoc S,
+ static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
+ unsigned RegNo, SMLoc S,
SMLoc E,
- const MCRegisterInfo *TRI,
- const MCSubtargetInfo *STI,
bool ForceVOP3) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+ auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
Op->Reg.RegNo = RegNo;
- Op->Reg.TRI = TRI;
- Op->Reg.STI = STI;
- Op->Reg.Mods = {false, false, false};
+ Op->Reg.Mods = Modifiers();
Op->Reg.IsForcedVOP3 = ForceVOP3;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AMDGPUOperand::Ptr CreateExpr(const class MCExpr *Expr, SMLoc S) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+ static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
+ const class MCExpr *Expr, SMLoc S) {
+ auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
Op->Expr = Expr;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -537,6 +657,53 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
return OS;
}
+//===----------------------------------------------------------------------===//
+// AsmParser
+//===----------------------------------------------------------------------===//
+
+// Holds info related to the current kernel, e.g. count of SGPRs used.
+// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
+// .amdgpu_hsa_kernel or at EOF.
+class KernelScopeInfo {
+ int SgprIndexUnusedMin;
+ int VgprIndexUnusedMin;
+ MCContext *Ctx;
+
+ void usesSgprAt(int i) {
+ if (i >= SgprIndexUnusedMin) {
+ SgprIndexUnusedMin = ++i;
+ if (Ctx) {
+ MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
+ Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
+ }
+ }
+ }
+ void usesVgprAt(int i) {
+ if (i >= VgprIndexUnusedMin) {
+ VgprIndexUnusedMin = ++i;
+ if (Ctx) {
+ MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
+ Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
+ }
+ }
+ }
+public:
+ KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr)
+ {}
+ void initialize(MCContext &Context) {
+ Ctx = &Context;
+ usesSgprAt(SgprIndexUnusedMin = -1);
+ usesVgprAt(VgprIndexUnusedMin = -1);
+ }
+ void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
+ switch (RegKind) {
+ case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
+ case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
+ default: break;
+ }
+ }
+};
+
class AMDGPUAsmParser : public MCTargetAsmParser {
const MCInstrInfo &MII;
MCAsmParser &Parser;
@@ -544,22 +711,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
unsigned ForcedEncodingSize;
bool ForcedDPP;
bool ForcedSDWA;
-
- bool isSI() const {
- return AMDGPU::isSI(getSTI());
- }
-
- bool isCI() const {
- return AMDGPU::isCI(getSTI());
- }
-
- bool isVI() const {
- return AMDGPU::isVI(getSTI());
- }
-
- bool hasSGPR102_SGPR103() const {
- return !isVI();
- }
+ KernelScopeInfo KernelScope;
/// @name Auto-generated Match Functions
/// {
@@ -570,9 +722,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
private:
+ bool ParseAsAbsoluteExpression(uint32_t &Ret);
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
+ bool ParseDirectiveRuntimeMetadata();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
bool ParseSectionDirectiveHSAText();
@@ -584,7 +738,7 @@ private:
bool ParseSectionDirectiveHSADataGlobalProgram();
bool ParseSectionDirectiveHSARodataReadonlyAgent();
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
- bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth);
+ bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);
public:
@@ -622,6 +776,27 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
}
+ KernelScope.initialize(getContext());
+ }
+
+ bool isSI() const {
+ return AMDGPU::isSI(getSTI());
+ }
+
+ bool isCI() const {
+ return AMDGPU::isCI(getSTI());
+ }
+
+ bool isVI() const {
+ return AMDGPU::isVI(getSTI());
+ }
+
+ bool hasInv2PiInlineImm() const {
+ return getSTI().getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
+ }
+
+ bool hasSGPR102_SGPR103() const {
+ return !isVI();
}
AMDGPUTargetStreamer &getTargetStreamer() {
@@ -629,6 +804,16 @@ public:
return static_cast<AMDGPUTargetStreamer &>(TS);
}
+ const MCRegisterInfo *getMRI() const {
+ // We need this const_cast because for some reason getContext() is not const
+ // in MCAsmParser.
+ return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
+ }
+
+ const MCInstrInfo *getMII() const {
+ return &MII;
+ }
+
void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
@@ -652,23 +837,28 @@ public:
StringRef parseMnemonicSuffix(StringRef Name);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
+ //bool ProcessInstruction(MCInst &Inst);
OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
- OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
- OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
- bool (*ConvertResult)(int64_t&) = 0);
- OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
- OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value);
+ OperandMatchResultTy
+ parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ bool (*ConvertResult)(int64_t &) = nullptr);
+ OperandMatchResultTy
+ parseNamedBit(const char *Name, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
+ OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
+ StringRef &Value);
OperandMatchResultTy parseImm(OperandVector &Operands);
OperandMatchResultTy parseRegOrImm(OperandVector &Operands);
OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands);
OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands);
+ OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
void cvtDS(MCInst &Inst, const OperandVector &Operands);
+ void cvtExp(MCInst &Inst, const OperandVector &Operands);
bool parseCnt(int64_t &IntVal);
OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
@@ -683,10 +873,17 @@ private:
bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+
+ void errorExpTgt();
+ OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
+
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
+ OperandMatchResultTy parseExpTgt(OperandVector &Operands);
OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
+ OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
+ OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
@@ -701,7 +898,8 @@ public:
AMDGPUOperand::Ptr defaultDA() const;
AMDGPUOperand::Ptr defaultR128() const;
AMDGPUOperand::Ptr defaultLWE() const;
- AMDGPUOperand::Ptr defaultSMRDOffset() const;
+ AMDGPUOperand::Ptr defaultSMRDOffset8() const;
+ AMDGPUOperand::Ptr defaultSMRDOffset20() const;
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
@@ -736,8 +934,274 @@ struct OptionalOperand {
bool (*ConvertResult)(int64_t&);
};
+} // end anonymous namespace
+
+// May be called with integer type with equivalent bitwidth.
+static const fltSemantics *getFltSemantics(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return &APFloat::IEEEsingle();
+ case 8:
+ return &APFloat::IEEEdouble();
+ case 2:
+ return &APFloat::IEEEhalf();
+ default:
+ llvm_unreachable("unsupported fp type");
+ }
+}
+
+static const fltSemantics *getFltSemantics(MVT VT) {
+ return getFltSemantics(VT.getSizeInBits() / 8);
+}
+
+//===----------------------------------------------------------------------===//
+// Operand
+//===----------------------------------------------------------------------===//
+
+static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
+ bool Lost;
+
+ // Convert literal to single precision
+ APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
+ APFloat::rmNearestTiesToEven,
+ &Lost);
+ // We allow precision lost but not overflow or underflow
+ if (Status != APFloat::opOK &&
+ Lost &&
+ ((Status & APFloat::opOverflow) != 0 ||
+ (Status & APFloat::opUnderflow) != 0)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool AMDGPUOperand::isInlinableImm(MVT type) const {
+ if (!isImmTy(ImmTyNone)) {
+ // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
+ return false;
+ }
+ // TODO: We should avoid using host float here. It would be better to
+ // check the float bit values which is what a few other places do.
+ // We've had bot failures before due to weird NaN support on mips hosts.
+
+ APInt Literal(64, Imm.Val);
+
+ if (Imm.IsFPImm) { // We got fp literal token
+ if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
+ return AMDGPU::isInlinableLiteral64(Imm.Val,
+ AsmParser->hasInv2PiInlineImm());
+ }
+
+ APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
+ if (!canLosslesslyConvertToFPType(FPLiteral, type))
+ return false;
+
+ // Check if single precision literal is inlinable
+ return AMDGPU::isInlinableLiteral32(
+ static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
+ AsmParser->hasInv2PiInlineImm());
+ }
+
+
+ // We got int literal token.
+ if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
+ return AMDGPU::isInlinableLiteral64(Imm.Val,
+ AsmParser->hasInv2PiInlineImm());
+ }
+
+ if (type.getScalarSizeInBits() == 16) {
+ return AMDGPU::isInlinableLiteral16(
+ static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
+ AsmParser->hasInv2PiInlineImm());
+ }
+
+ return AMDGPU::isInlinableLiteral32(
+ static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
+ AsmParser->hasInv2PiInlineImm());
+}
+
+bool AMDGPUOperand::isLiteralImm(MVT type) const {
+ // Check that this imediate can be added as literal
+ if (!isImmTy(ImmTyNone)) {
+ return false;
+ }
+
+ if (!Imm.IsFPImm) {
+ // We got int literal token.
+
+ unsigned Size = type.getSizeInBits();
+ if (Size == 64)
+ Size = 32;
+
+ // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
+ // types.
+ return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
+ }
+
+ // We got fp literal token
+ if (type == MVT::f64) { // Expected 64-bit fp operand
+ // We would set low 64-bits of literal to zeroes but we accept this literals
+ return true;
+ }
+
+ if (type == MVT::i64) { // Expected 64-bit int operand
+ // We don't allow fp literals in 64-bit integer instructions. It is
+ // unclear how we should encode them.
+ return false;
+ }
+
+ APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
+ return canLosslesslyConvertToFPType(FPLiteral, type);
+}
+
+bool AMDGPUOperand::isRegClass(unsigned RCID) const {
+ return isReg() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
+}
+
+void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
+ int64_t Val = Imm.Val;
+ if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) {
+ // Apply modifiers to immediate value. Only negate can get here
+ if (Imm.IsFPImm) {
+ APFloat F(BitsToDouble(Val));
+ F.changeSign();
+ Val = F.bitcastToAPInt().getZExtValue();
+ } else {
+ Val = -Val;
+ }
+ }
+
+ if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
+ Inst.getNumOperands())) {
+ addLiteralImmOperand(Inst, Val);
+ } else {
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+}
+
+void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
+ const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
+ auto OpNum = Inst.getNumOperands();
+ // Check that this operand accepts literals
+ assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
+
+ auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
+
+ if (Imm.IsFPImm) { // We got fp literal token
+ APInt Literal(64, Val);
+
+ switch (OpSize) {
+ case 8: {
+ if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
+ AsmParser->hasInv2PiInlineImm())) {
+ Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
+ return;
+ }
+
+ // Non-inlineable
+ if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
+ // For fp operands we check if low 32 bits are zeros
+ if (Literal.getLoBits(32) != 0) {
+ const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
+ "Can't encode literal as exact 64-bit floating-point operand. "
+ "Low 32-bits will be set to zero");
+ }
+
+ Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
+ return;
+ }
+
+ // We don't allow fp literals in 64-bit integer instructions. It is
+ // unclear how we should encode them. This case should be checked earlier
+ // in predicate methods (isLiteralImm())
+ llvm_unreachable("fp literal in 64-bit integer instruction.");
+ }
+ case 4:
+ case 2: {
+ bool lost;
+ APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
+ // Convert literal to single precision
+ FPLiteral.convert(*getFltSemantics(OpSize),
+ APFloat::rmNearestTiesToEven, &lost);
+ // We allow precision lost but not overflow or underflow. This should be
+ // checked earlier in isLiteralImm()
+ Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+ return;
+ }
+ default:
+ llvm_unreachable("invalid operand size");
+ }
+
+ return;
+ }
+
+ // We got int literal token.
+ // Only sign extend inline immediates.
+ // FIXME: No errors on truncation
+ switch (OpSize) {
+ case 4: {
+ if (isInt<32>(Val) &&
+ AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
+ AsmParser->hasInv2PiInlineImm())) {
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
+ return;
+ }
+ case 8: {
+ if (AMDGPU::isInlinableLiteral64(Val,
+ AsmParser->hasInv2PiInlineImm())) {
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
+ return;
+ }
+ case 2: {
+ if (isInt<16>(Val) &&
+ AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
+ AsmParser->hasInv2PiInlineImm())) {
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Val & 0xffff));
+ return;
+ }
+ default:
+ llvm_unreachable("invalid operand size");
+ }
}
+template <unsigned Bitwidth>
+void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
+ APInt Literal(64, Imm.Val);
+
+ if (!Imm.IsFPImm) {
+ // We got int literal token.
+ Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
+ return;
+ }
+
+ bool Lost;
+ APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
+ FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
+ APFloat::rmNearestTiesToEven, &Lost);
+ Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+}
+
+void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
+}
+
+//===----------------------------------------------------------------------===//
+// AsmParser
+//===----------------------------------------------------------------------===//
+
static int getRegClass(RegisterKind Is, unsigned RegWidth) {
if (Is == IS_VGPR) {
switch (RegWidth) {
@@ -818,12 +1282,13 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R
RegWidth++;
return true;
default:
- assert(false); return false;
+ llvm_unreachable("unexpected register kind");
}
}
-bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth)
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex)
{
+ if (DwordRegIndex) { *DwordRegIndex = 0; }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
if (getLexer().is(AsmToken::Identifier)) {
StringRef RegName = Parser.getTok().getString();
@@ -883,7 +1348,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
} else if (getLexer().is(AsmToken::LBrac)) {
// List of consecutive registers: [s0,s1,s2,s3]
Parser.Lex();
- if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
+ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
return false;
if (RegWidth != 1)
return false;
@@ -895,7 +1360,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
} else if (getLexer().is(AsmToken::RBrac)) {
Parser.Lex();
break;
- } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1)) {
+ } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
if (RegWidth1 != 1) {
return false;
}
@@ -923,11 +1388,12 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
{
unsigned Size = 1;
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
- // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
+ // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
Size = std::min(RegWidth, 4u);
}
if (RegNum % Size != 0)
return false;
+ if (DwordRegIndex) { *DwordRegIndex = RegNum; }
RegNum = RegNum / Size;
int RCID = getRegClass(RegKind, RegWidth);
if (RCID == -1)
@@ -940,7 +1406,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
}
default:
- assert(false); return false;
+ llvm_unreachable("unexpected register kind");
}
if (!subtargetHasRegister(*TRI, Reg))
@@ -952,20 +1418,19 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
const auto &Tok = Parser.getTok();
SMLoc StartLoc = Tok.getLoc();
SMLoc EndLoc = Tok.getEndLoc();
- const MCRegisterInfo *TRI = getContext().getRegisterInfo();
-
RegisterKind RegKind;
- unsigned Reg, RegNum, RegWidth;
+ unsigned Reg, RegNum, RegWidth, DwordRegIndex;
- if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
+ if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
return nullptr;
}
- return AMDGPUOperand::CreateReg(Reg, StartLoc, EndLoc,
- TRI, &getSTI(), false);
+ KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
+ return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseImm(OperandVector &Operands) {
+ // TODO: add syntactic sugar for 1/(2*PI)
bool Minus = false;
if (getLexer().getKind() == AsmToken::Minus) {
Minus = true;
@@ -978,28 +1443,21 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
int64_t IntVal;
if (getParser().parseAbsoluteExpression(IntVal))
return MatchOperand_ParseFail;
- if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
- Error(S, "invalid immediate: only 32-bit values are legal");
- return MatchOperand_ParseFail;
- }
-
if (Minus)
IntVal *= -1;
- Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
return MatchOperand_Success;
}
case AsmToken::Real: {
- // FIXME: We should emit an error if a double precisions floating-point
- // value is used. I'm not sure the best way to detect this.
int64_t IntVal;
if (getParser().parseAbsoluteExpression(IntVal))
return MatchOperand_ParseFail;
- APFloat F((float)BitsToDouble(IntVal));
+ APFloat F(BitsToDouble(IntVal));
if (Minus)
F.changeSign();
Operands.push_back(
- AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S,
+ AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
AMDGPUOperand::ImmTyNone, true));
return MatchOperand_Success;
}
@@ -1008,7 +1466,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
}
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
auto res = parseImm(Operands);
if (res != MatchOperand_NoMatch) {
@@ -1024,7 +1482,7 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands) {
// XXX: During parsing we can't determine if minus sign means
// negate-modifier or negative immediate value.
@@ -1060,7 +1518,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands) {
return Res;
}
- AMDGPUOperand::Modifiers Mods = {false, false, false};
+ AMDGPUOperand::Modifiers Mods;
if (Negate) {
Mods.Neg = true;
}
@@ -1088,7 +1546,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands) {
return MatchOperand_Success;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands) {
bool Sext = false;
@@ -1107,7 +1565,7 @@ AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands) {
return Res;
}
- AMDGPUOperand::Modifiers Mods = {false, false, false};
+ AMDGPUOperand::Modifiers Mods;
if (Sext) {
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "expected closing parentheses");
@@ -1116,14 +1574,33 @@ AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands) {
Parser.Lex();
Mods.Sext = true;
}
-
+
if (Mods.hasIntModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
Op.setModifiers(Mods);
}
+
return MatchOperand_Success;
}
+OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
+ std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
+ if (Reg) {
+ Operands.push_back(std::move(Reg));
+ return MatchOperand_Success;
+ }
+
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.getString() == "off") {
+ Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
+ AMDGPUOperand::ImmTyOff, false));
+ Parser.Lex();
+ return MatchOperand_Success;
+ }
+
+ return MatchOperand_NoMatch;
+}
+
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
@@ -1139,6 +1616,17 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
getForcedEncodingSize() != 64)
return Match_PreferE32;
+ if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
+ Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
+ // v_mac_f32/16 allow only dst_sel == DWORD;
+ auto OpNum =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
+ const auto &Op = Inst.getOperand(OpNum);
+ if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
+ return Match_InvalidOperand;
+ }
+ }
+
return Match_Success;
}
@@ -1147,57 +1635,105 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
+ // What asm variants we should check
+ std::vector<unsigned> MatchedVariants;
+ if (getForcedEncodingSize() == 32) {
+ MatchedVariants = {AMDGPUAsmVariants::DEFAULT};
+ } else if (isForcedVOP3()) {
+ MatchedVariants = {AMDGPUAsmVariants::VOP3};
+ } else if (isForcedSDWA()) {
+ MatchedVariants = {AMDGPUAsmVariants::SDWA};
+ } else if (isForcedDPP()) {
+ MatchedVariants = {AMDGPUAsmVariants::DPP};
+ } else {
+ MatchedVariants = {AMDGPUAsmVariants::DEFAULT,
+ AMDGPUAsmVariants::VOP3,
+ AMDGPUAsmVariants::SDWA,
+ AMDGPUAsmVariants::DPP};
+ }
+
MCInst Inst;
+ unsigned Result = Match_Success;
+ for (auto Variant : MatchedVariants) {
+ uint64_t EI;
+ auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
+ Variant);
+ // We order match statuses from least to most specific. We use most specific
+ // status as resulting
+ // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
+ if ((R == Match_Success) ||
+ (R == Match_PreferE32) ||
+ (R == Match_MissingFeature && Result != Match_PreferE32) ||
+ (R == Match_InvalidOperand && Result != Match_MissingFeature
+ && Result != Match_PreferE32) ||
+ (R == Match_MnemonicFail && Result != Match_InvalidOperand
+ && Result != Match_MissingFeature
+ && Result != Match_PreferE32)) {
+ Result = R;
+ ErrorInfo = EI;
+ }
+ if (R == Match_Success)
+ break;
+ }
- switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
- default: break;
- case Match_Success:
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
- return false;
- case Match_MissingFeature:
- return Error(IDLoc, "instruction not supported on this GPU");
+ switch (Result) {
+ default: break;
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, getSTI());
+ return false;
- case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction not supported on this GPU");
- case Match_InvalidOperand: {
- SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0ULL) {
- if (ErrorInfo >= Operands.size()) {
- return Error(IDLoc, "too few operands for instruction");
- }
- ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc())
- ErrorLoc = IDLoc;
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size()) {
+ return Error(IDLoc, "too few operands for instruction");
}
- return Error(ErrorLoc, "invalid operand for instruction");
+ ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
}
- case Match_PreferE32:
- return Error(IDLoc, "internal error: instruction without _e64 suffix "
- "should be encoded as e32");
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ case Match_PreferE32:
+ return Error(IDLoc, "internal error: instruction without _e64 suffix "
+ "should be encoded as e32");
}
llvm_unreachable("Implement any new match types added!");
}
+bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
+ int64_t Tmp = -1;
+ if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
+ return true;
+ }
+ if (getParser().parseAbsoluteExpression(Tmp)) {
+ return true;
+ }
+ Ret = static_cast<uint32_t>(Tmp);
+ return false;
+}
+
+
bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
uint32_t &Minor) {
- if (getLexer().isNot(AsmToken::Integer))
+ if (ParseAsAbsoluteExpression(Major))
return TokError("invalid major version");
- Major = getLexer().getTok().getIntVal();
- Lex();
-
if (getLexer().isNot(AsmToken::Comma))
return TokError("minor version number required, comma expected");
Lex();
- if (getLexer().isNot(AsmToken::Integer))
+ if (ParseAsAbsoluteExpression(Minor))
return TokError("invalid minor version");
- Minor = getLexer().getTok().getIntVal();
- Lex();
-
return false;
}
@@ -1214,7 +1750,6 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
}
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
-
uint32_t Major;
uint32_t Minor;
uint32_t Stepping;
@@ -1231,7 +1766,6 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return false;
}
-
if (ParseDirectiveMajorMinor(Major, Minor))
return true;
@@ -1239,12 +1773,9 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return TokError("stepping version number required, comma expected");
Lex();
- if (getLexer().isNot(AsmToken::Integer))
+ if (ParseAsAbsoluteExpression(Stepping))
return TokError("invalid stepping version");
- Stepping = getLexer().getTok().getIntVal();
- Lex();
-
if (getLexer().isNot(AsmToken::Comma))
return TokError("vendor name required, comma expected");
Lex();
@@ -1270,6 +1801,46 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return false;
}
+bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
+ std::string Metadata;
+ raw_string_ostream MS(Metadata);
+
+ getLexer().setSkipSpace(false);
+
+ bool FoundEnd = false;
+ while (!getLexer().is(AsmToken::Eof)) {
+ while (getLexer().is(AsmToken::Space)) {
+ MS << ' ';
+ Lex();
+ }
+
+ if (getLexer().is(AsmToken::Identifier)) {
+ StringRef ID = getLexer().getTok().getIdentifier();
+ if (ID == ".end_amdgpu_runtime_metadata") {
+ Lex();
+ FoundEnd = true;
+ break;
+ }
+ }
+
+ MS << Parser.parseStringToEndOfStatement()
+ << getContext().getAsmInfo()->getSeparatorString();
+
+ Parser.eatToEndOfStatement();
+ }
+
+ getLexer().setSkipSpace(true);
+
+ if (getLexer().is(AsmToken::Eof) && !FoundEnd)
+ return TokError("expected directive .end_amdgpu_runtime_metadata not found");
+
+ MS.flush();
+
+ getTargetStreamer().EmitRuntimeMetadata(Metadata);
+
+ return false;
+}
+
bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
amd_kernel_code_t &Header) {
SmallString<40> ErrStr;
@@ -1282,12 +1853,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
-
amd_kernel_code_t Header;
AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits());
while (true) {
-
// Lex EndOfStatement. This is in a while loop, because lexing a comment
// will set the current token to EndOfStatement.
while(getLexer().is(AsmToken::EndOfStatement))
@@ -1326,6 +1895,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
ELF::STT_AMDGPU_HSA_KERNEL);
Lex();
+ KernelScope.initialize(getContext());
return false;
}
@@ -1378,6 +1948,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();
+ if (IDVal == ".amdgpu_runtime_metadata")
+ return ParseDirectiveRuntimeMetadata();
+
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
@@ -1433,7 +2006,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
return true;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Try to parse with a custom parser
@@ -1464,11 +2037,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
SMLoc S = Tok.getLoc();
const MCExpr *Expr = nullptr;
if (!Parser.parseExpression(Expr)) {
- Operands.push_back(AMDGPUOperand::CreateExpr(Expr, S));
+ Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
return MatchOperand_Success;
}
- Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(), Tok.getLoc()));
+ Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), Tok.getLoc()));
Parser.Lex();
return MatchOperand_Success;
}
@@ -1502,10 +2075,10 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
SMLoc NameLoc, OperandVector &Operands) {
// Add the instruction mnemonic
Name = parseMnemonicSuffix(Name);
- Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
+ Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
while (!getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+ OperandMatchResultTy Res = parseOperand(Operands, Name);
// Eat the comma or space if there is one.
if (getLexer().is(AsmToken::Comma))
@@ -1535,7 +2108,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
// Utility functions
//===----------------------------------------------------------------------===//
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
switch(getLexer().getKind()) {
default: return MatchOperand_NoMatch;
@@ -1561,15 +2134,14 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
return MatchOperand_Success;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
enum AMDGPUOperand::ImmTy ImmTy,
bool (*ConvertResult)(int64_t&)) {
-
SMLoc S = Parser.getTok().getLoc();
int64_t Value = 0;
- AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
+ OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
if (Res != MatchOperand_Success)
return Res;
@@ -1577,11 +2149,11 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
return MatchOperand_ParseFail;
}
- Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
return MatchOperand_Success;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
enum AMDGPUOperand::ImmTy ImmTy) {
int64_t Bit = 0;
@@ -1609,7 +2181,7 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
}
}
- Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
@@ -1627,7 +2199,7 @@ void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
}
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
if (getLexer().isNot(AsmToken::Identifier)) {
return MatchOperand_NoMatch;
@@ -1657,7 +2229,6 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
const OperandVector &Operands) {
-
OptionalImmIndexMap OptionalIdx;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
@@ -1681,7 +2252,6 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
}
void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
-
std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
bool GDSOnly = false;
@@ -1712,6 +2282,46 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
}
+void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ unsigned EnMask = 0;
+ int SrcIdx = 0;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ EnMask |= (1 << SrcIdx);
+ Op.addRegOperands(Inst, 1);
+ ++SrcIdx;
+ continue;
+ }
+
+ if (Op.isOff()) {
+ ++SrcIdx;
+ Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
+ continue;
+ }
+
+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
+ Op.addImmOperands(Inst, 1);
+ continue;
+ }
+
+ if (Op.isToken() && Op.getToken() == "done")
+ continue;
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
+
+ Inst.addOperand(MCOperand::createImm(EnMask));
+}
//===----------------------------------------------------------------------===//
// s_waitcnt
@@ -1739,52 +2349,41 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
Parser.Lex();
- int CntShift;
- int CntMask;
-
- if (CntName == "vmcnt") {
- CntMask = 0xf;
- CntShift = 0;
- } else if (CntName == "expcnt") {
- CntMask = 0x7;
- CntShift = 4;
- } else if (CntName == "lgkmcnt") {
- CntMask = 0xf;
- CntShift = 8;
- } else {
+ IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+ if (CntName == "vmcnt")
+ IntVal = encodeVmcnt(IV, IntVal, CntVal);
+ else if (CntName == "expcnt")
+ IntVal = encodeExpcnt(IV, IntVal, CntVal);
+ else if (CntName == "lgkmcnt")
+ IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
+ else
return true;
- }
- IntVal &= ~(CntMask << CntShift);
- IntVal |= (CntVal << CntShift);
return false;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- // Disable all counters by default.
- // vmcnt [3:0]
- // expcnt [6:4]
- // lgkmcnt [11:8]
- int64_t CntVal = 0xf7f;
+ IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+ int64_t Waitcnt = getWaitcntBitMask(IV);
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default: return MatchOperand_ParseFail;
case AsmToken::Integer:
// The operand can be an integer value.
- if (getParser().parseAbsoluteExpression(CntVal))
+ if (getParser().parseAbsoluteExpression(Waitcnt))
return MatchOperand_ParseFail;
break;
case AsmToken::Identifier:
do {
- if (parseCnt(CntVal))
+ if (parseCnt(Waitcnt))
return MatchOperand_ParseFail;
} while(getLexer().isNot(AsmToken::EndOfStatement));
break;
}
- Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
return MatchOperand_Success;
}
@@ -1849,7 +2448,7 @@ bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
return false;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
using namespace llvm::AMDGPU::Hwreg;
@@ -1889,7 +2488,7 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
}
break;
}
- Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
return MatchOperand_Success;
}
@@ -1997,7 +2596,147 @@ bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &O
return false;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return MatchOperand_NoMatch;
+
+ StringRef Str = Parser.getTok().getString();
+ int Slot = StringSwitch<int>(Str)
+ .Case("p10", 0)
+ .Case("p20", 1)
+ .Case("p0", 2)
+ .Default(-1);
+
+ SMLoc S = Parser.getTok().getLoc();
+ if (Slot == -1)
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
+ AMDGPUOperand::ImmTyInterpSlot));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return MatchOperand_NoMatch;
+
+ StringRef Str = Parser.getTok().getString();
+ if (!Str.startswith("attr"))
+ return MatchOperand_NoMatch;
+
+ StringRef Chan = Str.take_back(2);
+ int AttrChan = StringSwitch<int>(Chan)
+ .Case(".x", 0)
+ .Case(".y", 1)
+ .Case(".z", 2)
+ .Case(".w", 3)
+ .Default(-1);
+ if (AttrChan == -1)
+ return MatchOperand_ParseFail;
+
+ Str = Str.drop_back(2).drop_front(4);
+
+ uint8_t Attr;
+ if (Str.getAsInteger(10, Attr))
+ return MatchOperand_ParseFail;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex();
+ if (Attr > 63) {
+ Error(S, "out of bounds attr");
+ return MatchOperand_Success;
+ }
+
+ SMLoc SChan = SMLoc::getFromPointer(Chan.data());
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
+ AMDGPUOperand::ImmTyInterpAttr));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
+ AMDGPUOperand::ImmTyAttrChan));
+ return MatchOperand_Success;
+}
+
+void AMDGPUAsmParser::errorExpTgt() {
+ Error(Parser.getTok().getLoc(), "invalid exp target");
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
+ uint8_t &Val) {
+ if (Str == "null") {
+ Val = 9;
+ return MatchOperand_Success;
+ }
+
+ if (Str.startswith("mrt")) {
+ Str = Str.drop_front(3);
+ if (Str == "z") { // == mrtz
+ Val = 8;
+ return MatchOperand_Success;
+ }
+
+ if (Str.getAsInteger(10, Val))
+ return MatchOperand_ParseFail;
+
+ if (Val > 7)
+ errorExpTgt();
+
+ return MatchOperand_Success;
+ }
+
+ if (Str.startswith("pos")) {
+ Str = Str.drop_front(3);
+ if (Str.getAsInteger(10, Val))
+ return MatchOperand_ParseFail;
+
+ if (Val > 3)
+ errorExpTgt();
+
+ Val += 12;
+ return MatchOperand_Success;
+ }
+
+ if (Str.startswith("param")) {
+ Str = Str.drop_front(5);
+ if (Str.getAsInteger(10, Val))
+ return MatchOperand_ParseFail;
+
+ if (Val >= 32)
+ errorExpTgt();
+
+ Val += 32;
+ return MatchOperand_Success;
+ }
+
+ if (Str.startswith("invalid_target_")) {
+ Str = Str.drop_front(15);
+ if (Str.getAsInteger(10, Val))
+ return MatchOperand_ParseFail;
+
+ errorExpTgt();
+ return MatchOperand_Success;
+ }
+
+ return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
+ uint8_t Val;
+ StringRef Str = Parser.getTok().getString();
+
+ auto Res = parseExpTgtImpl(Str, Val);
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex();
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
+ AMDGPUOperand::ImmTyExpTgt));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
using namespace llvm::AMDGPU::SendMsg;
@@ -2068,11 +2807,11 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
}
Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
}
- } while (0);
+ } while (false);
}
break;
}
- Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
return MatchOperand_Success;
}
@@ -2084,7 +2823,7 @@ bool AMDGPUOperand::isSendMsg() const {
// sopp branch targets
//===----------------------------------------------------------------------===//
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
@@ -2094,12 +2833,12 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
int64_t Imm;
if (getParser().parseAbsoluteExpression(Imm))
return MatchOperand_ParseFail;
- Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
return MatchOperand_Success;
}
case AsmToken::Identifier:
- Operands.push_back(AMDGPUOperand::CreateExpr(
+ Operands.push_back(AMDGPUOperand::CreateExpr(this,
MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
Parser.getTok().getString()), getContext()), S));
Parser.Lex();
@@ -2112,15 +2851,15 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
//===----------------------------------------------------------------------===//
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyGLC);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTySLC);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyTFE);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
}
void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
@@ -2192,7 +2931,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) {
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
- assert(false);
+ llvm_unreachable("unexpected operand type");
}
}
@@ -2228,7 +2967,7 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
- assert(false);
+ llvm_unreachable("unexpected operand type");
}
}
@@ -2243,48 +2982,53 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDMask);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDA);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyR128);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyLWE);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
}
//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
-bool AMDGPUOperand::isSMRDOffset() const {
-
- // FIXME: Support 20-bit offsets on VI. We need to to pass subtarget
- // information here.
+bool AMDGPUOperand::isSMRDOffset8() const {
return isImm() && isUInt<8>(getImm());
}
+bool AMDGPUOperand::isSMRDOffset20() const {
+ return isImm() && isUInt<20>(getImm());
+}
+
bool AMDGPUOperand::isSMRDLiteralOffset() const {
// 32-bit literals are only supported on CI and we only want to use them
// when the offset is > 8-bits.
return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
}
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
//===----------------------------------------------------------------------===//
@@ -2317,10 +3061,13 @@ static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
if (BoundCtrl == 0) {
BoundCtrl = 1;
return true;
- } else if (BoundCtrl == -1) {
+ }
+
+ if (BoundCtrl == -1) {
BoundCtrl = 0;
return true;
}
+
return false;
}
@@ -2350,9 +3097,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
+ {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
};
-AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
+OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
OperandMatchResultTy res;
for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
// try to parse any optional operand here
@@ -2376,16 +3124,19 @@ AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(Oper
return MatchOperand_NoMatch;
}
-AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
-{
+OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
StringRef Name = Parser.getTok().getString();
if (Name == "mul") {
- return parseIntWithPrefix("mul", Operands, AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
- } else if (Name == "div") {
- return parseIntWithPrefix("div", Operands, AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
- } else {
- return MatchOperand_NoMatch;
+ return parseIntWithPrefix("mul", Operands,
+ AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
+ }
+
+ if (Name == "div") {
+ return parseIntWithPrefix("div", Operands,
+ AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
}
+
+ return MatchOperand_NoMatch;
}
void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) {
@@ -2407,6 +3158,17 @@ void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands)
}
}
+static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
+ // 1. This operand is input modifiers
+ return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
+ // 2. This is not last operand
+ && Desc.NumOperands > (OpNum + 1)
+ // 3. Next operand is register class
+ && Desc.OpInfo[OpNum + 1].RegClass != -1
+ // 4. Next register is not tied to any other operand
+ && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
+}
+
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
@@ -2417,18 +3179,36 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
- if (Op.isRegOrImmWithInputMods()) {
- // only fp modifiers allowed in VOP3
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
} else if (Op.isImm()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
- assert(false);
+ llvm_unreachable("unhandled operand type");
}
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+
+ // special case v_mac_{f16, f32}:
+ // it has src2 register operand that is tied to dst operand
+ // we don't allow modifiers for this operand in assembler so src2_modifiers
+ // should be 0
+ if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
+ Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
+ Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi) {
+ auto it = Inst.begin();
+ std::advance(
+ it,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ?
+ AMDGPU::V_MAC_F16_e64 :
+ AMDGPU::V_MAC_F32_e64,
+ AMDGPU::OpName::src2_modifiers));
+ it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
+ ++it;
+ Inst.insert(it, Inst.getOperand(0)); // src2 = dst
+ }
}
//===----------------------------------------------------------------------===//
@@ -2455,7 +3235,11 @@ bool AMDGPUOperand::isDPPCtrl() const {
return false;
}
-AMDGPUAsmParser::OperandMatchResultTy
+bool AMDGPUOperand::isGPRIdxMode() const {
+ return isImm() && isUInt<4>(getImm());
+}
+
+OperandMatchResultTy
AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
StringRef Prefix;
@@ -2469,8 +3253,10 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
if (Prefix == "row_mirror") {
Int = 0x140;
+ Parser.Lex();
} else if (Prefix == "row_half_mirror") {
Int = 0x141;
+ Parser.Lex();
} else {
// Check to prevent parseDPPCtrlOps from eating invalid tokens
if (Prefix != "quad_perm"
@@ -2494,60 +3280,46 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
Parser.Lex();
if (getLexer().isNot(AsmToken::LBrac))
return MatchOperand_ParseFail;
-
Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
- return MatchOperand_ParseFail;
- Int = getLexer().getTok().getIntVal();
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Comma))
- return MatchOperand_ParseFail;
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
+ if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
return MatchOperand_ParseFail;
- Int += (getLexer().getTok().getIntVal() << 2);
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Comma))
- return MatchOperand_ParseFail;
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
- return MatchOperand_ParseFail;
- Int += (getLexer().getTok().getIntVal() << 4);
+ for (int i = 0; i < 3; ++i) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Comma))
- return MatchOperand_ParseFail;
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
- return MatchOperand_ParseFail;
- Int += (getLexer().getTok().getIntVal() << 6);
+ int64_t Temp;
+ if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
+ return MatchOperand_ParseFail;
+ const int shift = i*2 + 2;
+ Int += (Temp << shift);
+ }
- Parser.Lex();
if (getLexer().isNot(AsmToken::RBrac))
return MatchOperand_ParseFail;
+ Parser.Lex();
} else {
// sel:%d
Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
+ if (getParser().parseAbsoluteExpression(Int))
return MatchOperand_ParseFail;
- Int = getLexer().getTok().getIntVal();
- if (Prefix == "row_shl") {
+ if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
Int |= 0x100;
- } else if (Prefix == "row_shr") {
+ } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
Int |= 0x110;
- } else if (Prefix == "row_ror") {
+ } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
Int |= 0x120;
- } else if (Prefix == "wave_shl") {
+ } else if (Prefix == "wave_shl" && 1 == Int) {
Int = 0x130;
- } else if (Prefix == "wave_rol") {
+ } else if (Prefix == "wave_rol" && 1 == Int) {
Int = 0x134;
- } else if (Prefix == "wave_shr") {
+ } else if (Prefix == "wave_shr" && 1 == Int) {
Int = 0x138;
- } else if (Prefix == "wave_ror") {
+ } else if (Prefix == "wave_ror" && 1 == Int) {
Int = 0x13C;
} else if (Prefix == "row_bcast") {
if (Int == 15) {
@@ -2562,23 +3334,21 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
}
}
}
- Parser.Lex(); // eat last token
- Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
- AMDGPUOperand::ImmTyDppCtrl));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
return MatchOperand_Success;
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
- return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
+ return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
- return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
+ return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
- return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
}
void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
@@ -2593,8 +3363,11 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
- if (Op.isRegOrImmWithInputMods()) {
- // Only float modifiers supported in DPP
+ if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
+ // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token.
+ // Skip it.
+ continue;
+ } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
} else if (Op.isDPPCtrl()) {
Op.addImmOperands(Inst, 1);
@@ -2609,18 +3382,30 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+
+ // special case v_mac_{f16, f32}:
+ // it has src2 register operand that is tied to dst operand
+ if (Inst.getOpcode() == AMDGPU::V_MAC_F32_dpp ||
+ Inst.getOpcode() == AMDGPU::V_MAC_F16_dpp) {
+ auto it = Inst.begin();
+ std::advance(
+ it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
+ Inst.insert(it, Inst.getOperand(0)); // src2 = dst
+ }
}
//===----------------------------------------------------------------------===//
// sdwa
//===----------------------------------------------------------------------===//
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type) {
+ using namespace llvm::AMDGPU::SDWA;
+
SMLoc S = Parser.getTok().getLoc();
StringRef Value;
- AMDGPUAsmParser::OperandMatchResultTy res;
+ OperandMatchResultTy res;
res = parseStringWithPrefix(Prefix, Value);
if (res != MatchOperand_Success) {
@@ -2629,13 +3414,13 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
int64_t Int;
Int = StringSwitch<int64_t>(Value)
- .Case("BYTE_0", 0)
- .Case("BYTE_1", 1)
- .Case("BYTE_2", 2)
- .Case("BYTE_3", 3)
- .Case("WORD_0", 4)
- .Case("WORD_1", 5)
- .Case("DWORD", 6)
+ .Case("BYTE_0", SdwaSel::BYTE_0)
+ .Case("BYTE_1", SdwaSel::BYTE_1)
+ .Case("BYTE_2", SdwaSel::BYTE_2)
+ .Case("BYTE_3", SdwaSel::BYTE_3)
+ .Case("WORD_0", SdwaSel::WORD_0)
+ .Case("WORD_1", SdwaSel::WORD_1)
+ .Case("DWORD", SdwaSel::DWORD)
.Default(0xffffffff);
Parser.Lex(); // eat last token
@@ -2643,15 +3428,17 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
return MatchOperand_ParseFail;
}
- Operands.push_back(AMDGPUOperand::CreateImm(Int, S, Type));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
return MatchOperand_Success;
}
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
+ using namespace llvm::AMDGPU::SDWA;
+
SMLoc S = Parser.getTok().getLoc();
StringRef Value;
- AMDGPUAsmParser::OperandMatchResultTy res;
+ OperandMatchResultTy res;
res = parseStringWithPrefix("dst_unused", Value);
if (res != MatchOperand_Success) {
@@ -2660,9 +3447,9 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
int64_t Int;
Int = StringSwitch<int64_t>(Value)
- .Case("UNUSED_PAD", 0)
- .Case("UNUSED_SEXT", 1)
- .Case("UNUSED_PRESERVE", 2)
+ .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
+ .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
+ .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
.Default(0xffffffff);
Parser.Lex(); // eat last token
@@ -2670,8 +3457,7 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
- AMDGPUOperand::ImmTySdwaDstUnused));
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
return MatchOperand_Success;
}
@@ -2700,13 +3486,15 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
- if (BasicInstType == SIInstrFlags::VOPC &&
+ if ((BasicInstType == SIInstrFlags::VOPC ||
+ BasicInstType == SIInstrFlags::VOP2)&&
Op.isReg() &&
Op.Reg.RegNo == AMDGPU::VCC) {
- // VOPC sdwa use "vcc" token as dst. Skip it.
+ // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
+ // Skip it.
continue;
- } else if (Op.isRegOrImmWithInputMods()) {
- Op.addRegOrImmWithInputModsOperands(Inst, 2);
+ } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegOrImmWithInputModsOperands(Inst, 2);
} else if (Op.isImm()) {
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = I;
@@ -2716,46 +3504,55 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
-
- if (Inst.getOpcode() == AMDGPU::V_NOP_sdwa) {
- // V_NOP_sdwa has no optional sdwa arguments
- return;
- }
- switch (BasicInstType) {
- case SIInstrFlags::VOP1: {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
- break;
- }
- case SIInstrFlags::VOP2: {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
- break;
- }
- case SIInstrFlags::VOPC: {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
- break;
+
+ if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
+ // V_NOP_sdwa_vi has no optional sdwa arguments
+ switch (BasicInstType) {
+ case SIInstrFlags::VOP1:
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+ break;
+
+ case SIInstrFlags::VOP2:
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+ break;
+
+ case SIInstrFlags::VOPC:
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+ break;
+
+ default:
+ llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
+ }
}
- default:
- llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
+
+ // special case v_mac_{f16, f32}:
+ // it has src2 register operand that is tied to dst operand
+ if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
+ Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
+ auto it = Inst.begin();
+ std::advance(
+ it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
+ Inst.insert(it, Inst.getOperand(0)); // src2 = dst
}
+
}
/// Force static initialization.
extern "C" void LLVMInitializeAMDGPUAsmParser() {
- RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
- RegisterMCAsmParser<AMDGPUAsmParser> B(TheGCNTarget);
+ RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
+ RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
}
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "AMDGPUGenAsmMatcher.inc"
-
// This fuction should be defined after auto-generated include so that we have
// MatchClassKind enum defined
unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
@@ -2776,16 +3573,27 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
case MCK_offen:
return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
- case MCK_SSrc32:
+ case MCK_SSrcB32:
// When operands have expression values, they will return true for isToken,
// because it is not possible to distinguish between a token and an
// expression at parse time. MatchInstructionImpl() will always try to
// match an operand as a token, when isToken returns true, and when the
// name of the expression is not a valid token, the match will fail,
// so we need to handle it here.
- return Operand.isSSrc32() ? Match_Success : Match_InvalidOperand;
+ return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
+ case MCK_SSrcF32:
+ return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
case MCK_SoppBrTarget:
return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
- default: return Match_InvalidOperand;
+ case MCK_VReg32OrOff:
+ return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
+ case MCK_InterpSlot:
+ return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
+ case MCK_Attr:
+ return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
+ case MCK_AttrChan:
+ return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
+ default:
+ return Match_InvalidOperand;
}
}