vendor/llvm/llvm-release_39-r276489

author: Dimitry Andric <dim@FreeBSD.org> 2016-07-23 20:41:05 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2016-07-23 20:41:05 +0000
commit: 01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree: 4def12e759965de927d963ac65840d663ef9d1ea /lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
parent: f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)
1 files changed, 1702 insertions, 803 deletions
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d9f753f401330..efcf1b23adaa0 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
+//===-- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,15 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AMDKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "AMDKernelCodeT.h"
 #include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
+#include "Utils/AMDGPUAsmUtils.h"
 #include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
@@ -25,16 +27,17 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace llvm;
 
@@ -42,6 +45,8 @@ namespace {
 
 struct OptionalOperand;
 
+enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
+
 class AMDGPUOperand : public MCParsedAsmOperand {
   enum KindTy {
     Token,
@@ -55,19 +60,74 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 public:
   AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
 
-  MCContext *Ctx;
+  typedef std::unique_ptr<AMDGPUOperand> Ptr;
+
+  struct Modifiers {
+    bool Abs;
+    bool Neg;
+    bool Sext;
+
+    bool hasFPModifiers() const { return Abs || Neg; }
+    bool hasIntModifiers() const { return Sext; }
+    bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
+
+    int64_t getFPModifiersOperand() const {
+      int64_t Operand = 0;
+      Operand |= Abs ? SISrcMods::ABS : 0;
+      Operand |= Neg ? SISrcMods::NEG : 0;
+      return Operand;
+    }
+
+    int64_t getIntModifiersOperand() const {
+      int64_t Operand = 0;
+      Operand |= Sext ? SISrcMods::SEXT : 0;
+      return Operand;
+    }
+
+    int64_t getModifiersOperand() const {
+      assert(!(hasFPModifiers() && hasIntModifiers())
+           && "fp and int modifiers should not be used simultaneously");
+      if (hasFPModifiers()) {
+        return getFPModifiersOperand();
+      } else if (hasIntModifiers()) {
+        return getIntModifiersOperand();
+      } else {
+        return 0;
+      }
+    }
+
+    friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
+  };
 
   enum ImmTy {
     ImmTyNone,
-    ImmTyDSOffset0,
-    ImmTyDSOffset1,
     ImmTyGDS,
+    ImmTyOffen,
+    ImmTyIdxen,
+    ImmTyAddr64,
     ImmTyOffset,
+    ImmTyOffset0,
+    ImmTyOffset1,
     ImmTyGLC,
     ImmTySLC,
     ImmTyTFE,
-    ImmTyClamp,
-    ImmTyOMod
+    ImmTyClampSI,
+    ImmTyOModSI,
+    ImmTyDppCtrl,
+    ImmTyDppRowMask,
+    ImmTyDppBankMask,
+    ImmTyDppBoundCtrl,
+    ImmTySdwaDstSel,
+    ImmTySdwaSrc0Sel,
+    ImmTySdwaSrc1Sel,
+    ImmTySdwaDstUnused,
+    ImmTyDMask,
+    ImmTyUNorm,
+    ImmTyDA,
+    ImmTyR128,
+    ImmTyLWE,
+    ImmTyHwreg,
+    ImmTySendMsg,
   };
 
   struct TokOp {
@@ -79,11 +139,12 @@ public:
     bool IsFPImm;
     ImmTy Type;
     int64_t Val;
+    Modifiers Mods;
   };
 
   struct RegOp {
     unsigned RegNo;
-    int Modifiers;
+    Modifiers Mods;
     const MCRegisterInfo *TRI;
     const MCSubtargetInfo *STI;
     bool IsForcedVOP3;
@@ -96,175 +157,323 @@ public:
     const MCExpr *Expr;
   };
 
-  void addImmOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::createImm(getImm()));
+  bool isToken() const override {
+    if (Kind == Token)
+      return true;
+
+    if (Kind != Expression || !Expr)
+      return false;
+
+    // When parsing operands, we can't always tell if something was meant to be
+    // a token, like 'gds', or an expression that references a global variable.
+    // In this case, we assume the string is an expression, and if we need to
+    // interpret is a token, then we treat the symbol name as the token.
+    return isa<MCSymbolRefExpr>(Expr);
   }
 
-  StringRef getToken() const {
-    return StringRef(Tok.Data, Tok.Length);
+  bool isImm() const override {
+    return Kind == Immediate;
   }
 
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
+  bool isInlinableImm() const {
+    if (!isImmTy(ImmTyNone)) {
+      // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
+      return false;
+    }
+    // TODO: We should avoid using host float here. It would be better to
+    // check the float bit values which is what a few other places do.
+    // We've had bot failures before due to weird NaN support on mips hosts.
+    const float F = BitsToFloat(Imm.Val);
+    // TODO: Add 1/(2*pi) for VI
+    return (Imm.Val <= 64 && Imm.Val >= -16) ||
+           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0);
   }
 
-  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
-    if (isReg())
-      addRegOperands(Inst, N);
-    else
-      addImmOperands(Inst, N);
+  bool isRegKind() const {
+    return Kind == Register;
   }
 
-  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::createImm(
-        Reg.Modifiers == -1 ? 0 : Reg.Modifiers));
-    addRegOperands(Inst, N);
+  bool isReg() const override {
+    return isRegKind() && !Reg.Mods.hasModifiers();
+  }
+
+  bool isRegOrImmWithInputMods() const {
+    return isRegKind() || isInlinableImm();
+  }
+
+  bool isImmTy(ImmTy ImmT) const {
+    return isImm() && Imm.Type == ImmT;
+  }
+  
+  bool isImmModifier() const {
+    return isImm() && Imm.Type != ImmTyNone;
+  }
+  
+  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
+  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
+  bool isDMask() const { return isImmTy(ImmTyDMask); }
+  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
+  bool isDA() const { return isImmTy(ImmTyDA); }
+  bool isR128() const { return isImmTy(ImmTyUNorm); }
+  bool isLWE() const { return isImmTy(ImmTyLWE); }
+  bool isOffen() const { return isImmTy(ImmTyOffen); }
+  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
+  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
+  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
+  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
+  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
+  bool isGDS() const { return isImmTy(ImmTyGDS); }
+  bool isGLC() const { return isImmTy(ImmTyGLC); }
+  bool isSLC() const { return isImmTy(ImmTySLC); }
+  bool isTFE() const { return isImmTy(ImmTyTFE); }
+  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
+  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
+  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
+  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
+  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
+  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
+  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
+  
+  bool isMod() const {
+    return isClampSI() || isOModSI();
   }
 
-  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
-    if (isImm())
-      addImmOperands(Inst, N);
-    else {
-      assert(isExpr());
-      Inst.addOperand(MCOperand::createExpr(Expr));
-    }
+  bool isRegOrImm() const {
+    return isReg() || isImm();
   }
 
-  bool defaultTokenHasSuffix() const {
-    StringRef Token(Tok.Data, Tok.Length);
+  bool isRegClass(unsigned RCID) const {
+    return isReg() && Reg.TRI->getRegClass(RCID).contains(getReg());
+  }
 
-    return Token.endswith("_e32") || Token.endswith("_e64");
+  bool isSCSrc32() const {
+    return isInlinableImm() || isRegClass(AMDGPU::SReg_32RegClassID);
   }
 
-  bool isToken() const override {
-    return Kind == Token;
+  bool isSCSrc64() const {
+    return isInlinableImm() || isRegClass(AMDGPU::SReg_64RegClassID);
   }
 
-  bool isImm() const override {
-    return Kind == Immediate;
+  bool isSSrc32() const {
+    return isImm() || isSCSrc32() || isExpr();
   }
 
-  bool isInlineImm() const {
-    float F = BitsToFloat(Imm.Val);
-    // TODO: Add 0.5pi for VI
-    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
-           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
-           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+  bool isSSrc64() const {
+    // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
+    // See isVSrc64().
+    return isImm() || isSCSrc64();
   }
 
-  bool isDSOffset0() const {
-    assert(isImm());
-    return Imm.Type == ImmTyDSOffset0;
+  bool isVCSrc32() const {
+    return isInlinableImm() || isRegClass(AMDGPU::VS_32RegClassID);
   }
 
-  bool isDSOffset1() const {
-    assert(isImm());
-    return Imm.Type == ImmTyDSOffset1;
+  bool isVCSrc64() const {
+    return isInlinableImm() || isRegClass(AMDGPU::VS_64RegClassID);
   }
 
-  int64_t getImm() const {
-    return Imm.Val;
+  bool isVSrc32() const {
+    return isImm() || isVCSrc32();
   }
 
-  enum ImmTy getImmTy() const {
-    assert(isImm());
-    return Imm.Type;
+  bool isVSrc64() const {
+    // TODO: Check if the 64-bit value (coming from assembly source) can be
+    // narrowed to 32 bits (in the instruction stream). That require knowledge
+    // of instruction type (unsigned/signed, floating or "untyped"/B64),
+    // see [AMD GCN3 ISA 6.3.1].
+    // TODO: How 64-bit values are formed from 32-bit literals in _B64 insns?
+    return isImm() || isVCSrc64();
   }
 
-  bool isRegKind() const {
-    return Kind == Register;
+  bool isMem() const override {
+    return false;
   }
 
-  bool isReg() const override {
-    return Kind == Register && Reg.Modifiers == -1;
+  bool isExpr() const {
+    return Kind == Expression;
   }
 
-  bool isRegWithInputMods() const {
-    return Kind == Register && (Reg.IsForcedVOP3 || Reg.Modifiers != -1);
+  bool isSoppBrTarget() const {
+    return isExpr() || isImm();
   }
 
-  void setModifiers(unsigned Mods) {
-    assert(isReg());
-    Reg.Modifiers = Mods;
+  bool isSWaitCnt() const;
+  bool isHwreg() const;
+  bool isSendMsg() const;
+  bool isSMRDOffset() const;
+  bool isSMRDLiteralOffset() const;
+  bool isDPPCtrl() const;
+
+  StringRef getExpressionAsToken() const {
+    assert(isExpr());
+    const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
+    return S->getSymbol().getName();
   }
 
-  bool hasModifiers() const {
-    assert(isRegKind());
-    return Reg.Modifiers != -1;
+
+  StringRef getToken() const {
+    assert(isToken());
+
+    if (Kind == Expression)
+      return getExpressionAsToken();
+
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  int64_t getImm() const {
+    assert(isImm());
+    return Imm.Val;
+  }
+
+  enum ImmTy getImmTy() const {
+    assert(isImm());
+    return Imm.Type;
   }
 
   unsigned getReg() const override {
     return Reg.RegNo;
   }
 
-  bool isRegOrImm() const {
-    return isReg() || isImm();
+  SMLoc getStartLoc() const override {
+    return StartLoc;
   }
 
-  bool isRegClass(unsigned RCID) const {
-    return Reg.TRI->getRegClass(RCID).contains(getReg());
+  SMLoc getEndLoc() const override {
+    return EndLoc;
   }
 
-  bool isSCSrc32() const {
-    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  Modifiers getModifiers() const {
+    assert(isRegKind() || isImmTy(ImmTyNone));
+    return isRegKind() ? Reg.Mods : Imm.Mods;
   }
 
-  bool isSSrc32() const {
-    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  void setModifiers(Modifiers Mods) {
+    assert(isRegKind() || isImmTy(ImmTyNone));
+    if (isRegKind())
+      Reg.Mods = Mods;
+    else
+      Imm.Mods = Mods;
   }
 
-  bool isSSrc64() const {
-    return isImm() || isInlineImm() ||
-           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+  bool hasModifiers() const {
+    return getModifiers().hasModifiers();
   }
-
-  bool isSCSrc64() const {
-    return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm();
+  
+  bool hasFPModifiers() const {
+    return getModifiers().hasFPModifiers();
   }
 
-  bool isVCSrc32() const {
-    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  bool hasIntModifiers() const {
+    return getModifiers().hasIntModifiers();
   }
 
-  bool isVCSrc64() const {
-    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const {
+    if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers()) {
+      // Apply modifiers to immediate value 
+      int64_t Val = Imm.Val;
+      bool Negate = Imm.Mods.Neg; // Only negate can get here
+      if (Imm.IsFPImm) {
+        APFloat F(BitsToFloat(Val));
+        if (Negate) {
+          F.changeSign();
+        }
+        Val = F.bitcastToAPInt().getZExtValue();
+      } else {
+        Val = Negate ? -Val : Val;
+      }
+      Inst.addOperand(MCOperand::createImm(Val));
+    } else {
+      Inst.addOperand(MCOperand::createImm(getImm()));
+    }
   }
 
-  bool isVSrc32() const {
-    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
   }
 
-  bool isVSrc64() const {
-    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
+    if (isRegKind())
+      addRegOperands(Inst, N);
+    else if (isExpr())
+      Inst.addOperand(MCOperand::createExpr(Expr));
+    else
+      addImmOperands(Inst, N);
   }
 
-  bool isMem() const override {
-    return false;
+  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
+    Modifiers Mods = getModifiers();
+    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
+    if (isRegKind()) {
+      addRegOperands(Inst, N);
+    } else {
+      addImmOperands(Inst, N, false);
+    }
   }
 
-  bool isExpr() const {
-    return Kind == Expression;
+  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
+    assert(!hasIntModifiers());
+    addRegOrImmWithInputModsOperands(Inst, N);
   }
 
-  bool isSoppBrTarget() const {
-    return isExpr() || isImm();
+  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
+    assert(!hasFPModifiers());
+    addRegOrImmWithInputModsOperands(Inst, N);
   }
 
-  SMLoc getStartLoc() const override {
-    return StartLoc;
+  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
+    if (isImm())
+      addImmOperands(Inst, N);
+    else {
+      assert(isExpr());
+      Inst.addOperand(MCOperand::createExpr(Expr));
+    }
   }
 
-  SMLoc getEndLoc() const override {
-    return EndLoc;
+  void printImmTy(raw_ostream& OS, ImmTy Type) const {
+    switch (Type) {
+    case ImmTyNone: OS << "None"; break;
+    case ImmTyGDS: OS << "GDS"; break;
+    case ImmTyOffen: OS << "Offen"; break;
+    case ImmTyIdxen: OS << "Idxen"; break;
+    case ImmTyAddr64: OS << "Addr64"; break;
+    case ImmTyOffset: OS << "Offset"; break;
+    case ImmTyOffset0: OS << "Offset0"; break;
+    case ImmTyOffset1: OS << "Offset1"; break;
+    case ImmTyGLC: OS << "GLC"; break;
+    case ImmTySLC: OS << "SLC"; break;
+    case ImmTyTFE: OS << "TFE"; break;
+    case ImmTyClampSI: OS << "ClampSI"; break;
+    case ImmTyOModSI: OS << "OModSI"; break;
+    case ImmTyDppCtrl: OS << "DppCtrl"; break;
+    case ImmTyDppRowMask: OS << "DppRowMask"; break;
+    case ImmTyDppBankMask: OS << "DppBankMask"; break;
+    case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
+    case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
+    case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
+    case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
+    case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
+    case ImmTyDMask: OS << "DMask"; break;
+    case ImmTyUNorm: OS << "UNorm"; break;
+    case ImmTyDA: OS << "DA"; break;
+    case ImmTyR128: OS << "R128"; break;
+    case ImmTyLWE: OS << "LWE"; break;
+    case ImmTyHwreg: OS << "Hwreg"; break;
+    case ImmTySendMsg: OS << "SendMsg"; break;
+    }
   }
 
   void print(raw_ostream &OS) const override {
     switch (Kind) {
     case Register:
-      OS << "<register " << getReg() << " mods: " << Reg.Modifiers << '>';
+      OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
       break;
     case Immediate:
-      OS << getImm();
+      OS << '<' << getImm();
+      if (getImmTy() != ImmTyNone) {
+        OS << " type: "; printImmTy(OS, getImmTy());
+      }
+      OS << " mods: " << Imm.Mods << '>';
       break;
     case Token:
       OS << '\'' << getToken() << '\'';
@@ -275,20 +484,21 @@ public:
     }
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
-                                                  enum ImmTy Type = ImmTyNone,
-                                                  bool IsFPImm = false) {
+  static AMDGPUOperand::Ptr CreateImm(int64_t Val, SMLoc Loc,
+                                      enum ImmTy Type = ImmTyNone,
+                                      bool IsFPImm = false) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
     Op->Imm.Val = Val;
     Op->Imm.IsFPImm = IsFPImm;
     Op->Imm.Type = Type;
+    Op->Imm.Mods = {false, false, false};
     Op->StartLoc = Loc;
     Op->EndLoc = Loc;
     return Op;
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
-                                           bool HasExplicitEncodingSize = true) {
+  static AMDGPUOperand::Ptr CreateToken(StringRef Str, SMLoc Loc,
+                                        bool HasExplicitEncodingSize = true) {
     auto Res = llvm::make_unique<AMDGPUOperand>(Token);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
@@ -297,43 +507,43 @@ public:
     return Res;
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
-                                                  SMLoc E,
-                                                  const MCRegisterInfo *TRI,
-                                                  const MCSubtargetInfo *STI,
-                                                  bool ForceVOP3) {
+  static AMDGPUOperand::Ptr CreateReg(unsigned RegNo, SMLoc S,
+                                      SMLoc E,
+                                      const MCRegisterInfo *TRI,
+                                      const MCSubtargetInfo *STI,
+                                      bool ForceVOP3) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Register);
     Op->Reg.RegNo = RegNo;
     Op->Reg.TRI = TRI;
     Op->Reg.STI = STI;
-    Op->Reg.Modifiers = -1;
+    Op->Reg.Mods = {false, false, false};
     Op->Reg.IsForcedVOP3 = ForceVOP3;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
+  static AMDGPUOperand::Ptr CreateExpr(const class MCExpr *Expr, SMLoc S) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
     Op->Expr = Expr;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
-
-  bool isDSOffset() const;
-  bool isDSOffset01() const;
-  bool isSWaitCnt() const;
-  bool isMubufOffset() const;
-  bool isSMRDOffset() const;
-  bool isSMRDLiteralOffset() const;
 };
 
+raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
+  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
+  return OS;
+}
+
 class AMDGPUAsmParser : public MCTargetAsmParser {
   const MCInstrInfo &MII;
   MCAsmParser &Parser;
 
   unsigned ForcedEncodingSize;
+  bool ForcedDPP;
+  bool ForcedSDWA;
 
   bool isSI() const {
     return AMDGPU::isSI(getSTI());
@@ -373,9 +583,11 @@ private:
   bool ParseSectionDirectiveHSADataGlobalAgent();
   bool ParseSectionDirectiveHSADataGlobalProgram();
   bool ParseSectionDirectiveHSARodataReadonlyAgent();
+  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
+  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth);
+  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);
 
 public:
-public:
   enum AMDGPUMatchResultTy {
     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
   };
@@ -384,7 +596,9 @@ public:
                const MCInstrInfo &MII,
                const MCTargetOptions &Options)
       : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
-        ForcedEncodingSize(0) {
+        ForcedEncodingSize(0),
+        ForcedDPP(false),
+        ForcedSDWA(false) {
     MCAsmParserExtension::Initialize(Parser);
 
     if (getSTI().getFeatureBits().none()) {
@@ -393,6 +607,21 @@ public:
     }
 
     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
+    {
+      // TODO: make those pre-defined variables read-only.
+      // Currently there is none suitable machinery in the core llvm-mc for this.
+      // MCSymbol::isRedefinable is intended for another purpose, and
+      // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
+      AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
+      MCContext &Ctx = getContext();
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+      Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx));
+      Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
+      Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx));
+      Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
+      Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
+    }
   }
 
   AMDGPUTargetStreamer &getTargetStreamer() {
@@ -400,84 +629,117 @@ public:
     return static_cast<AMDGPUTargetStreamer &>(TS);
   }
 
-  unsigned getForcedEncodingSize() const {
-    return ForcedEncodingSize;
-  }
-
-  void setForcedEncodingSize(unsigned Size) {
-    ForcedEncodingSize = Size;
-  }
+  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
+  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
+  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
 
-  bool isForcedVOP3() const {
-    return ForcedEncodingSize == 64;
-  }
+  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
+  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
+  bool isForcedDPP() const { return ForcedDPP; }
+  bool isForcedSDWA() const { return ForcedSDWA; }
 
+  std::unique_ptr<AMDGPUOperand> parseRegister();
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+                                      unsigned Kind) override;
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                OperandVector &Operands, MCStreamer &Out,
                                uint64_t &ErrorInfo,
                                bool MatchingInlineAsm) override;
   bool ParseDirective(AsmToken DirectiveID) override;
   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+  StringRef parseMnemonicSuffix(StringRef Name);
   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                         SMLoc NameLoc, OperandVector &Operands) override;
 
-  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
-                                          int64_t Default = 0);
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
   OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
                                           OperandVector &Operands,
-                                          enum AMDGPUOperand::ImmTy ImmTy =
-                                                      AMDGPUOperand::ImmTyNone);
+                                          enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+                                          bool (*ConvertResult)(int64_t&) = 0);
   OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
-                                     enum AMDGPUOperand::ImmTy ImmTy =
-                                                      AMDGPUOperand::ImmTyNone);
-  OperandMatchResultTy parseOptionalOps(
-                                   const ArrayRef<OptionalOperand> &OptionalOps,
-                                   OperandVector &Operands);
+                                     enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value);
 
+  OperandMatchResultTy parseImm(OperandVector &Operands);
+  OperandMatchResultTy parseRegOrImm(OperandVector &Operands);
+  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands);
+  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands);
 
   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
   void cvtDS(MCInst &Inst, const OperandVector &Operands);
-  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
 
   bool parseCnt(int64_t &IntVal);
   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
-  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+  OperandMatchResultTy parseHwreg(OperandVector &Operands);
 
-  OperandMatchResultTy parseFlatOptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseFlatAtomicOptionalOps(OperandVector &Operands);
-  void cvtFlat(MCInst &Inst, const OperandVector &Operands);
+private:
+  struct OperandInfoTy {
+    int64_t Id;
+    bool IsSymbolic;
+    OperandInfoTy(int64_t Id_) : Id(Id_), IsSymbolic(false) { }
+  };
 
-  void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
-  OperandMatchResultTy parseOffset(OperandVector &Operands);
-  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseGLC(OperandVector &Operands);
-  OperandMatchResultTy parseSLC(OperandVector &Operands);
-  OperandMatchResultTy parseTFE(OperandVector &Operands);
+  bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
+  bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+public:
+  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
 
-  OperandMatchResultTy parseDMask(OperandVector &Operands);
-  OperandMatchResultTy parseUNorm(OperandVector &Operands);
-  OperandMatchResultTy parseR128(OperandVector &Operands);
+  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
+  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
 
+  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
+  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
+  void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
+  AMDGPUOperand::Ptr defaultGLC() const;
+  AMDGPUOperand::Ptr defaultSLC() const;
+  AMDGPUOperand::Ptr defaultTFE() const;
+
+  AMDGPUOperand::Ptr defaultDMask() const;
+  AMDGPUOperand::Ptr defaultUNorm() const;
+  AMDGPUOperand::Ptr defaultDA() const;
+  AMDGPUOperand::Ptr defaultR128() const;
+  AMDGPUOperand::Ptr defaultLWE() const;
+  AMDGPUOperand::Ptr defaultSMRDOffset() const;
+  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
+
+  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
+
+  void cvtId(MCInst &Inst, const OperandVector &Operands);
+  void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
-  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
+
+  void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
+  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
+
+  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
+  AMDGPUOperand::Ptr defaultRowMask() const;
+  AMDGPUOperand::Ptr defaultBankMask() const;
+  AMDGPUOperand::Ptr defaultBoundCtrl() const;
+  void cvtDPP(MCInst &Inst, const OperandVector &Operands);
+
+  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
+                                    AMDGPUOperand::ImmTy Type);
+  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
+  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
+  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
+  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
+  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
+               uint64_t BasicInstType);
 };
 
 struct OptionalOperand {
   const char *Name;
   AMDGPUOperand::ImmTy Type;
   bool IsBit;
-  int64_t Default;
   bool (*ConvertResult)(int64_t&);
 };
 
 }
 
-static int getRegClass(bool IsVgpr, unsigned RegWidth) {
-  if (IsVgpr) {
+static int getRegClass(RegisterKind Is, unsigned RegWidth) {
+  if (Is == IS_VGPR) {
     switch (RegWidth) {
       default: return -1;
       case 1: return AMDGPU::VGPR_32RegClassID;
@@ -487,109 +749,379 @@ static int getRegClass(bool IsVgpr, unsigned RegWidth) {
       case 8: return AMDGPU::VReg_256RegClassID;
       case 16: return AMDGPU::VReg_512RegClassID;
     }
+  } else if (Is == IS_TTMP) {
+    switch (RegWidth) {
+      default: return -1;
+      case 1: return AMDGPU::TTMP_32RegClassID;
+      case 2: return AMDGPU::TTMP_64RegClassID;
+      case 4: return AMDGPU::TTMP_128RegClassID;
+    }
+  } else if (Is == IS_SGPR) {
+    switch (RegWidth) {
+      default: return -1;
+      case 1: return AMDGPU::SGPR_32RegClassID;
+      case 2: return AMDGPU::SGPR_64RegClassID;
+      case 4: return AMDGPU::SGPR_128RegClassID;
+      case 8: return AMDGPU::SReg_256RegClassID;
+      case 16: return AMDGPU::SReg_512RegClassID;
+    }
   }
-
-  switch (RegWidth) {
-    default: return -1;
-    case 1: return AMDGPU::SGPR_32RegClassID;
-    case 2: return AMDGPU::SGPR_64RegClassID;
-    case 4: return AMDGPU::SReg_128RegClassID;
-    case 8: return AMDGPU::SReg_256RegClassID;
-    case 16: return AMDGPU::SReg_512RegClassID;
-  }
+  return -1;
 }
 
-static unsigned getRegForName(StringRef RegName) {
-
+static unsigned getSpecialRegForName(StringRef RegName) {
   return StringSwitch<unsigned>(RegName)
     .Case("exec", AMDGPU::EXEC)
     .Case("vcc", AMDGPU::VCC)
     .Case("flat_scratch", AMDGPU::FLAT_SCR)
     .Case("m0", AMDGPU::M0)
     .Case("scc", AMDGPU::SCC)
+    .Case("tba", AMDGPU::TBA)
+    .Case("tma", AMDGPU::TMA)
     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
     .Case("vcc_lo", AMDGPU::VCC_LO)
     .Case("vcc_hi", AMDGPU::VCC_HI)
     .Case("exec_lo", AMDGPU::EXEC_LO)
     .Case("exec_hi", AMDGPU::EXEC_HI)
+    .Case("tma_lo", AMDGPU::TMA_LO)
+    .Case("tma_hi", AMDGPU::TMA_HI)
+    .Case("tba_lo", AMDGPU::TBA_LO)
+    .Case("tba_hi", AMDGPU::TBA_HI)
     .Default(0);
 }
 
 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
-  const AsmToken Tok = Parser.getTok();
-  StartLoc = Tok.getLoc();
-  EndLoc = Tok.getEndLoc();
+  auto R = parseRegister();
+  if (!R) return true;
+  assert(R->isReg());
+  RegNo = R->getReg();
+  StartLoc = R->getStartLoc();
+  EndLoc = R->getEndLoc();
+  return false;
+}
+
+bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum)
+{
+  switch (RegKind) {
+  case IS_SPECIAL:
+    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; RegWidth = 2; return true; }
+    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; RegWidth = 2; return true; }
+    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; return true; }
+    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; RegWidth = 2; return true; }
+    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; RegWidth = 2; return true; }
+    return false;
+  case IS_VGPR:
+  case IS_SGPR:
+  case IS_TTMP:
+    if (Reg1 != Reg + RegWidth) { return false; }
+    RegWidth++;
+    return true;
+  default:
+    assert(false); return false;
+  }
+}
+
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth)
+{
+  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+  if (getLexer().is(AsmToken::Identifier)) {
+    StringRef RegName = Parser.getTok().getString();
+    if ((Reg = getSpecialRegForName(RegName))) {
+      Parser.Lex();
+      RegKind = IS_SPECIAL;
+    } else {
+      unsigned RegNumIndex = 0;
+      if (RegName[0] == 'v') {
+        RegNumIndex = 1;
+        RegKind = IS_VGPR;
+      } else if (RegName[0] == 's') {
+        RegNumIndex = 1;
+        RegKind = IS_SGPR;
+      } else if (RegName.startswith("ttmp")) {
+        RegNumIndex = strlen("ttmp");
+        RegKind = IS_TTMP;
+      } else {
+        return false;
+      }
+      if (RegName.size() > RegNumIndex) {
+        // Single 32-bit register: vXX.
+        if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
+          return false;
+        Parser.Lex();
+        RegWidth = 1;
+      } else {
+        // Range of registers: v[XX:YY]. ":YY" is optional.
+        Parser.Lex();
+        int64_t RegLo, RegHi;
+        if (getLexer().isNot(AsmToken::LBrac))
+          return false;
+        Parser.Lex();
+
+        if (getParser().parseAbsoluteExpression(RegLo))
+          return false;
+
+        const bool isRBrace = getLexer().is(AsmToken::RBrac);
+        if (!isRBrace && getLexer().isNot(AsmToken::Colon))
+          return false;
+        Parser.Lex();
+
+        if (isRBrace) {
+          RegHi = RegLo;
+        } else {
+          if (getParser().parseAbsoluteExpression(RegHi))
+            return false;
+
+          if (getLexer().isNot(AsmToken::RBrac))
+            return false;
+          Parser.Lex();
+        }
+        RegNum = (unsigned) RegLo;
+        RegWidth = (RegHi - RegLo) + 1;
+      }
+    }
+  } else if (getLexer().is(AsmToken::LBrac)) {
+    // List of consecutive registers: [s0,s1,s2,s3]
+    Parser.Lex();
+    if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
+      return false;
+    if (RegWidth != 1)
+      return false;
+    RegisterKind RegKind1;
+    unsigned Reg1, RegNum1, RegWidth1;
+    do {
+      if (getLexer().is(AsmToken::Comma)) {
+        Parser.Lex();
+      } else if (getLexer().is(AsmToken::RBrac)) {
+        Parser.Lex();
+        break;
+      } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1)) {
+        if (RegWidth1 != 1) {
+          return false;
+        }
+        if (RegKind1 != RegKind) {
+          return false;
+        }
+        if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
+          return false;
+        }
+      } else {
+        return false;
+      }
+    } while (true);
+  } else {
+    return false;
+  }
+  switch (RegKind) {
+  case IS_SPECIAL:
+    RegNum = 0;
+    RegWidth = 1;
+    break;
+  case IS_VGPR:
+  case IS_SGPR:
+  case IS_TTMP:
+  {
+    unsigned Size = 1;
+    if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
+      // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
+      Size = std::min(RegWidth, 4u);
+    }
+    if (RegNum % Size != 0)
+      return false;
+    RegNum = RegNum / Size;
+    int RCID = getRegClass(RegKind, RegWidth);
+    if (RCID == -1)
+      return false;
+    const MCRegisterClass RC = TRI->getRegClass(RCID);
+    if (RegNum >= RC.getNumRegs())
+      return false;
+    Reg = RC.getRegister(RegNum);
+    break;
+  }
+
+  default:
+    assert(false); return false;
+  }
+
+  if (!subtargetHasRegister(*TRI, Reg))
+    return false;
+  return true;
+}
+
+std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
+  const auto &Tok = Parser.getTok();
+  SMLoc StartLoc = Tok.getLoc();
+  SMLoc EndLoc = Tok.getEndLoc();
   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
 
-  StringRef RegName = Tok.getString();
-  RegNo = getRegForName(RegName);
+  RegisterKind RegKind;
+  unsigned Reg, RegNum, RegWidth;
 
-  if (RegNo) {
+  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
+    return nullptr;
+  }
+  return AMDGPUOperand::CreateReg(Reg, StartLoc, EndLoc,
+                                  TRI, &getSTI(), false);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseImm(OperandVector &Operands) {
+  bool Minus = false;
+  if (getLexer().getKind() == AsmToken::Minus) {
+    Minus = true;
     Parser.Lex();
-    return !subtargetHasRegister(*TRI, RegNo);
   }
 
-  // Match vgprs and sgprs
-  if (RegName[0] != 's' && RegName[0] != 'v')
-    return true;
+  SMLoc S = Parser.getTok().getLoc();
+  switch(getLexer().getKind()) {
+  case AsmToken::Integer: {
+    int64_t IntVal;
+    if (getParser().parseAbsoluteExpression(IntVal))
+      return MatchOperand_ParseFail;
+    if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
+      Error(S, "invalid immediate: only 32-bit values are legal");
+      return MatchOperand_ParseFail;
+    }
 
-  bool IsVgpr = RegName[0] == 'v';
-  unsigned RegWidth;
-  unsigned RegIndexInClass;
-  if (RegName.size() > 1) {
-    // We have a 32-bit register
-    RegWidth = 1;
-    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
-      return true;
+    if (Minus)
+      IntVal *= -1;
+    Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
+    return MatchOperand_Success;
+  }
+  case AsmToken::Real: {
+    // FIXME: We should emit an error if a double precisions floating-point
+    // value is used.  I'm not sure the best way to detect this.
+    int64_t IntVal;
+    if (getParser().parseAbsoluteExpression(IntVal))
+      return MatchOperand_ParseFail;
+
+    APFloat F((float)BitsToDouble(IntVal));
+    if (Minus)
+      F.changeSign();
+    Operands.push_back(
+        AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S,
+                                 AMDGPUOperand::ImmTyNone, true));
+    return MatchOperand_Success;
+  }
+  default:
+    return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
+  }
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
+  auto res = parseImm(Operands);
+  if (res != MatchOperand_NoMatch) {
+    return res;
+  }
+
+  if (auto R = parseRegister()) {
+    assert(R->isReg());
+    R->Reg.IsForcedVOP3 = isForcedVOP3();
+    Operands.push_back(std::move(R));
+    return MatchOperand_Success;
+  }
+  return MatchOperand_ParseFail;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands) {
+  // XXX: During parsing we can't determine if minus sign means
+  // negate-modifier or negative immediate value.
+  // By default we suppose it is modifier.
+  bool Negate = false, Abs = false, Abs2 = false;
+
+  if (getLexer().getKind()== AsmToken::Minus) {
     Parser.Lex();
-  } else {
-    // We have a register greater than 32-bits.
+    Negate = true;
+  }
 
-    int64_t RegLo, RegHi;
+  if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "abs") {
     Parser.Lex();
-    if (getLexer().isNot(AsmToken::LBrac))
-      return true;
+    Abs2 = true;
+    if (getLexer().isNot(AsmToken::LParen)) {
+      Error(Parser.getTok().getLoc(), "expected left paren after abs");
+      return MatchOperand_ParseFail;
+    }
+    Parser.Lex();
+  }
 
+  if (getLexer().getKind() == AsmToken::Pipe) {
+    if (Abs2) {
+      Error(Parser.getTok().getLoc(), "expected register or immediate");
+      return MatchOperand_ParseFail;
+    }
     Parser.Lex();
-    if (getParser().parseAbsoluteExpression(RegLo))
-      return true;
+    Abs = true;
+  }
 
-    if (getLexer().isNot(AsmToken::Colon))
-      return true;
+  auto Res = parseRegOrImm(Operands);
+  if (Res != MatchOperand_Success) {
+    return Res;
+  }
 
+  AMDGPUOperand::Modifiers Mods = {false, false, false};
+  if (Negate) {
+    Mods.Neg = true;
+  }
+  if (Abs) {
+    if (getLexer().getKind() != AsmToken::Pipe) {
+      Error(Parser.getTok().getLoc(), "expected vertical bar");
+      return MatchOperand_ParseFail;
+    }
     Parser.Lex();
-    if (getParser().parseAbsoluteExpression(RegHi))
-      return true;
+    Mods.Abs = true;
+  }
+  if (Abs2) {
+    if (getLexer().isNot(AsmToken::RParen)) {
+      Error(Parser.getTok().getLoc(), "expected closing parentheses");
+      return MatchOperand_ParseFail;
+    }
+    Parser.Lex();
+    Mods.Abs = true;
+  }
 
-    if (getLexer().isNot(AsmToken::RBrac))
-      return true;
+  if (Mods.hasFPModifiers()) {
+    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+    Op.setModifiers(Mods);
+  }
+  return MatchOperand_Success;
+}
 
-    Parser.Lex();
-    RegWidth = (RegHi - RegLo) + 1;
-    if (IsVgpr) {
-      // VGPR registers aren't aligned.
-      RegIndexInClass = RegLo;
-    } else {
-      // SGPR registers are aligned.  Max alignment is 4 dwords.
-      unsigned Size = std::min(RegWidth, 4u);
-      if (RegLo % Size != 0)
-        return true;
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands) {
+  bool Sext = false;
 
-      RegIndexInClass = RegLo / Size;
+  if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "sext") {
+    Parser.Lex();
+    Sext = true;
+    if (getLexer().isNot(AsmToken::LParen)) {
+      Error(Parser.getTok().getLoc(), "expected left paren after sext");
+      return MatchOperand_ParseFail;
     }
+    Parser.Lex();
   }
 
-  int RCID = getRegClass(IsVgpr, RegWidth);
-  if (RCID == -1)
-    return true;
-
-  const MCRegisterClass RC = TRI->getRegClass(RCID);
-  if (RegIndexInClass >= RC.getNumRegs())
-    return true;
+  auto Res = parseRegOrImm(Operands);
+  if (Res != MatchOperand_Success) {
+    return Res;
+  }
 
-  RegNo = RC.getRegister(RegIndexInClass);
-  return !subtargetHasRegister(*TRI, RegNo);
+  AMDGPUOperand::Modifiers Mods = {false, false, false};
+  if (Sext) {
+    if (getLexer().isNot(AsmToken::RParen)) {
+      Error(Parser.getTok().getLoc(), "expected closing parentheses");
+      return MatchOperand_ParseFail;
+    }
+    Parser.Lex();
+    Mods.Sext = true;
+  }
+  
+  if (Mods.hasIntModifiers()) {
+    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+    Op.setModifiers(Mods);
+  }
+  return MatchOperand_Success;
 }
 
 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
@@ -597,7 +1129,9 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
 
   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
-      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
+      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
+      (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
+      (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
     return Match_InvalidOperand;
 
   if ((TSFlags & SIInstrFlags::VOP3) &&
@@ -608,7 +1142,6 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   return Match_Success;
 }
 
-
 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                               OperandVector &Operands,
                                               MCStreamer &Out,
@@ -632,31 +1165,8 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       SMLoc ErrorLoc = IDLoc;
       if (ErrorInfo != ~0ULL) {
         if (ErrorInfo >= Operands.size()) {
-          if (isForcedVOP3()) {
-            // If 64-bit encoding has been forced we can end up with no
-            // clamp or omod operands if none of the registers have modifiers,
-            // so we need to add these to the operand list.
-            AMDGPUOperand &LastOp =
-                ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
-            if (LastOp.isRegKind() ||
-               (LastOp.isImm() &&
-                LastOp.getImmTy() != AMDGPUOperand::ImmTyNone)) {
-              SMLoc S = Parser.getTok().getLoc();
-              Operands.push_back(AMDGPUOperand::CreateImm(0, S,
-                                 AMDGPUOperand::ImmTyClamp));
-              Operands.push_back(AMDGPUOperand::CreateImm(0, S,
-                                 AMDGPUOperand::ImmTyOMod));
-              bool Res = MatchAndEmitInstruction(IDLoc, Opcode, Operands,
-                                                 Out, ErrorInfo,
-                                                 MatchingInlineAsm);
-              if (!Res)
-                return Res;
-            }
-
-          }
           return Error(IDLoc, "too few operands for instruction");
         }
-
         ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
         if (ErrorLoc == SMLoc())
           ErrorLoc = IDLoc;
@@ -762,164 +1272,12 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
 
 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
                                                amd_kernel_code_t &Header) {
-
-  if (getLexer().isNot(AsmToken::Equal))
-    return TokError("expected '='");
-  Lex();
-
-  if (getLexer().isNot(AsmToken::Integer))
-    return TokError("amd_kernel_code_t values must be integers");
-
-  uint64_t Value = getLexer().getTok().getIntVal();
+  SmallString<40> ErrStr;
+  raw_svector_ostream Err(ErrStr);
+  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
+    return TokError(Err.str());
+  }
   Lex();
-
-  if (ID == "kernel_code_version_major")
-    Header.amd_kernel_code_version_major = Value;
-  else if (ID == "kernel_code_version_minor")
-    Header.amd_kernel_code_version_minor = Value;
-  else if (ID == "machine_kind")
-    Header.amd_machine_kind = Value;
-  else if (ID == "machine_version_major")
-    Header.amd_machine_version_major = Value;
-  else if (ID == "machine_version_minor")
-    Header.amd_machine_version_minor = Value;
-  else if (ID == "machine_version_stepping")
-    Header.amd_machine_version_stepping = Value;
-  else if (ID == "kernel_code_entry_byte_offset")
-    Header.kernel_code_entry_byte_offset = Value;
-  else if (ID == "kernel_code_prefetch_byte_size")
-    Header.kernel_code_prefetch_byte_size = Value;
-  else if (ID == "max_scratch_backing_memory_byte_size")
-    Header.max_scratch_backing_memory_byte_size = Value;
-  else if (ID == "compute_pgm_rsrc1_vgprs")
-    Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value);
-  else if (ID == "compute_pgm_rsrc1_sgprs")
-    Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value);
-  else if (ID == "compute_pgm_rsrc1_priority")
-    Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value);
-  else if (ID == "compute_pgm_rsrc1_float_mode")
-    Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value);
-  else if (ID == "compute_pgm_rsrc1_priv")
-    Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value);
-  else if (ID == "compute_pgm_rsrc1_dx10_clamp")
-    Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value);
-  else if (ID == "compute_pgm_rsrc1_debug_mode")
-    Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value);
-  else if (ID == "compute_pgm_rsrc1_ieee_mode")
-    Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value);
-  else if (ID == "compute_pgm_rsrc2_scratch_en")
-    Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_user_sgpr")
-    Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_tgid_x_en")
-    Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_tgid_y_en")
-    Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_tgid_z_en")
-    Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_tg_size_en")
-    Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt")
-    Header.compute_pgm_resource_registers |=
-        (S_00B84C_TIDIG_COMP_CNT(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_excp_en_msb")
-    Header.compute_pgm_resource_registers |=
-        (S_00B84C_EXCP_EN_MSB(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_lds_size")
-    Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32);
-  else if (ID == "compute_pgm_rsrc2_excp_en")
-    Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32);
-  else if (ID == "compute_pgm_resource_registers")
-    Header.compute_pgm_resource_registers = Value;
-  else if (ID == "enable_sgpr_private_segment_buffer")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT);
-  else if (ID == "enable_sgpr_dispatch_ptr")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT);
-  else if (ID == "enable_sgpr_queue_ptr")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT);
-  else if (ID == "enable_sgpr_kernarg_segment_ptr")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT);
-  else if (ID == "enable_sgpr_dispatch_id")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT);
-  else if (ID == "enable_sgpr_flat_scratch_init")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT);
-  else if (ID == "enable_sgpr_private_segment_size")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT);
-  else if (ID == "enable_sgpr_grid_workgroup_count_x")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT);
-  else if (ID == "enable_sgpr_grid_workgroup_count_y")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT);
-  else if (ID == "enable_sgpr_grid_workgroup_count_z")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT);
-  else if (ID == "enable_ordered_append_gds")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT);
-  else if (ID == "private_element_size")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT);
-  else if (ID == "is_ptr64")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT);
-  else if (ID == "is_dynamic_callstack")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT);
-  else if (ID == "is_debug_enabled")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT);
-  else if (ID == "is_xnack_enabled")
-    Header.code_properties |=
-        (Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT);
-  else if (ID == "workitem_private_segment_byte_size")
-    Header.workitem_private_segment_byte_size = Value;
-  else if (ID == "workgroup_group_segment_byte_size")
-    Header.workgroup_group_segment_byte_size = Value;
-  else if (ID == "gds_segment_byte_size")
-    Header.gds_segment_byte_size = Value;
-  else if (ID == "kernarg_segment_byte_size")
-    Header.kernarg_segment_byte_size = Value;
-  else if (ID == "workgroup_fbarrier_count")
-    Header.workgroup_fbarrier_count = Value;
-  else if (ID == "wavefront_sgpr_count")
-    Header.wavefront_sgpr_count = Value;
-  else if (ID == "workitem_vgpr_count")
-    Header.workitem_vgpr_count = Value;
-  else if (ID == "reserved_vgpr_first")
-    Header.reserved_vgpr_first = Value;
-  else if (ID == "reserved_vgpr_count")
-    Header.reserved_vgpr_count = Value;
-  else if (ID == "reserved_sgpr_first")
-    Header.reserved_sgpr_first = Value;
-  else if (ID == "reserved_sgpr_count")
-    Header.reserved_sgpr_count = Value;
-  else if (ID == "debug_wavefront_private_segment_offset_sgpr")
-    Header.debug_wavefront_private_segment_offset_sgpr = Value;
-  else if (ID == "debug_private_segment_buffer_sgpr")
-    Header.debug_private_segment_buffer_sgpr = Value;
-  else if (ID == "kernarg_segment_alignment")
-    Header.kernarg_segment_alignment = Value;
-  else if (ID == "group_segment_alignment")
-    Header.group_segment_alignment = Value;
-  else if (ID == "private_segment_alignment")
-    Header.private_segment_alignment = Value;
-  else if (ID == "wavefront_size")
-    Header.wavefront_size = Value;
-  else if (ID == "call_convention")
-    Header.call_convention = Value;
-  else if (ID == "runtime_loader_kernel_symbol")
-    Header.runtime_loader_kernel_symbol = Value;
-  else
-    return TokError("amd_kernel_code_t value not recognized.");
-
   return false;
 }
 
@@ -930,9 +1288,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
 
   while (true) {
 
-    if (getLexer().isNot(AsmToken::EndOfStatement))
-      return TokError("amd_kernel_code_t values must begin on a new line");
-
     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
     // will set the current token to EndOfStatement.
     while(getLexer().is(AsmToken::EndOfStatement))
@@ -1026,7 +1381,7 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".amd_kernel_code_t")
     return ParseDirectiveAMDKernelCodeT();
 
-  if (IDVal == ".hsatext" || IDVal == ".text")
+  if (IDVal == ".hsatext")
     return ParseSectionDirectiveHSAText();
 
   if (IDVal == ".amdgpu_hsa_kernel")
@@ -1078,19 +1433,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
   return true;
 }
 
-static bool operandsHaveModifiers(const OperandVector &Operands) {
-
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-    const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
-    if (Op.isRegKind() && Op.hasModifiers())
-      return true;
-    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
-                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
-      return true;
-  }
-  return false;
-}
-
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
@@ -1107,113 +1449,59 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       getLexer().is(AsmToken::EndOfStatement))
     return ResTy;
 
-  bool Negate = false, Abs = false;
-  if (getLexer().getKind()== AsmToken::Minus) {
-    Parser.Lex();
-    Negate = true;
-  }
+  ResTy = parseRegOrImm(Operands);
 
-  if (getLexer().getKind() == AsmToken::Pipe) {
-    Parser.Lex();
-    Abs = true;
-  }
-
-  switch(getLexer().getKind()) {
-    case AsmToken::Integer: {
-      SMLoc S = Parser.getTok().getLoc();
-      int64_t IntVal;
-      if (getParser().parseAbsoluteExpression(IntVal))
-        return MatchOperand_ParseFail;
-      if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
-        Error(S, "invalid immediate: only 32-bit values are legal");
-        return MatchOperand_ParseFail;
-      }
-
-      if (Negate)
-        IntVal *= -1;
-      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
-      return MatchOperand_Success;
-    }
-    case AsmToken::Real: {
-      // FIXME: We should emit an error if a double precisions floating-point
-      // value is used.  I'm not sure the best way to detect this.
-      SMLoc S = Parser.getTok().getLoc();
-      int64_t IntVal;
-      if (getParser().parseAbsoluteExpression(IntVal))
-        return MatchOperand_ParseFail;
+  if (ResTy == MatchOperand_Success)
+    return ResTy;
 
-      APFloat F((float)BitsToDouble(IntVal));
-      if (Negate)
-        F.changeSign();
-      Operands.push_back(
-          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    // If this identifier is a symbol, we want to create an expression for it.
+    // It is a little difficult to distinguish between a symbol name, and
+    // an instruction flag like 'gds'.  In order to do this, we parse
+    // all tokens as expressions and then treate the symbol name as the token
+    // string when we want to interpret the operand as a token.
+    const auto &Tok = Parser.getTok();
+    SMLoc S = Tok.getLoc();
+    const MCExpr *Expr = nullptr;
+    if (!Parser.parseExpression(Expr)) {
+      Operands.push_back(AMDGPUOperand::CreateExpr(Expr, S));
       return MatchOperand_Success;
     }
-    case AsmToken::Identifier: {
-      SMLoc S, E;
-      unsigned RegNo;
-      if (!ParseRegister(RegNo, S, E)) {
-
-        bool HasModifiers = operandsHaveModifiers(Operands);
-        unsigned Modifiers = 0;
 
-        if (Negate)
-          Modifiers |= 0x1;
-
-        if (Abs) {
-          if (getLexer().getKind() != AsmToken::Pipe)
-            return MatchOperand_ParseFail;
-          Parser.Lex();
-          Modifiers |= 0x2;
-        }
-
-        if (Modifiers && !HasModifiers) {
-          // We are adding a modifier to src1 or src2 and previous sources
-          // don't have modifiers, so we need to go back and empty modifers
-          // for each previous source.
-          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
-               --PrevRegIdx) {
-
-            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
-            RegOp.setModifiers(0);
-          }
-        }
-
-
-        Operands.push_back(AMDGPUOperand::CreateReg(
-            RegNo, S, E, getContext().getRegisterInfo(), &getSTI(),
-            isForcedVOP3()));
-
-        if (HasModifiers || Modifiers) {
-          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
-          RegOp.setModifiers(Modifiers);
-
-        }
-     }  else {
-      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
-                                                    S));
-      Parser.Lex();
-     }
-     return MatchOperand_Success;
-    }
-    default:
-      return MatchOperand_NoMatch;
+    Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(), Tok.getLoc()));
+    Parser.Lex();
+    return MatchOperand_Success;
   }
+  return MatchOperand_NoMatch;
 }
 
-bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
-                                       StringRef Name,
-                                       SMLoc NameLoc, OperandVector &Operands) {
-
+StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
   // Clear any forced encodings from the previous instruction.
   setForcedEncodingSize(0);
+  setForcedDPP(false);
+  setForcedSDWA(false);
 
-  if (Name.endswith("_e64"))
+  if (Name.endswith("_e64")) {
     setForcedEncodingSize(64);
-  else if (Name.endswith("_e32"))
+    return Name.substr(0, Name.size() - 4);
+  } else if (Name.endswith("_e32")) {
     setForcedEncodingSize(32);
+    return Name.substr(0, Name.size() - 4);
+  } else if (Name.endswith("_dpp")) {
+    setForcedDPP(true);
+    return Name.substr(0, Name.size() - 4);
+  } else if (Name.endswith("_sdwa")) {
+    setForcedSDWA(true);
+    return Name.substr(0, Name.size() - 5);
+  }
+  return Name;
+}
 
+bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                       StringRef Name,
+                                       SMLoc NameLoc, OperandVector &Operands) {
   // Add the instruction mnemonic
+  Name = parseMnemonicSuffix(Name);
   Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
 
   while (!getLexer().is(AsmToken::EndOfStatement)) {
@@ -1225,20 +1513,21 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
 
     switch (Res) {
       case MatchOperand_Success: break;
-      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
-                                                "failed parsing operand.");
-      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
-                                              "not a valid operand.");
+      case MatchOperand_ParseFail:
+        Error(getLexer().getLoc(), "failed parsing operand.");
+        while (!getLexer().is(AsmToken::EndOfStatement)) {
+          Parser.Lex();
+        }
+        return true;
+      case MatchOperand_NoMatch:
+        Error(getLexer().getLoc(), "not a valid operand.");
+        while (!getLexer().is(AsmToken::EndOfStatement)) {
+          Parser.Lex();
+        }
+        return true;
     }
   }
 
-  // Once we reach end of statement, continue parsing so we can add default
-  // values for optional arguments.
-  AMDGPUAsmParser::OperandMatchResultTy Res;
-  while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
-    if (Res != MatchOperand_Success)
-      return Error(getLexer().getLoc(), "failed parsing operand.");
-  }
   return false;
 }
 
@@ -1247,22 +1536,14 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
 //===----------------------------------------------------------------------===//
 
 AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
-                                    int64_t Default) {
-
-  // We are at the end of the statement, and this is a default argument, so
-  // use a default value.
-  if (getLexer().is(AsmToken::EndOfStatement)) {
-    Int = Default;
-    return MatchOperand_Success;
-  }
-
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
   switch(getLexer().getKind()) {
     default: return MatchOperand_NoMatch;
     case AsmToken::Identifier: {
-      StringRef OffsetName = Parser.getTok().getString();
-      if (!OffsetName.equals(Prefix))
+      StringRef Name = Parser.getTok().getString();
+      if (!Name.equals(Prefix)) {
         return MatchOperand_NoMatch;
+      }
 
       Parser.Lex();
       if (getLexer().isNot(AsmToken::Colon))
@@ -1282,16 +1563,21 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
 
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
-                                    enum AMDGPUOperand::ImmTy ImmTy) {
+                                    enum AMDGPUOperand::ImmTy ImmTy,
+                                    bool (*ConvertResult)(int64_t&)) {
 
   SMLoc S = Parser.getTok().getLoc();
-  int64_t Offset = 0;
+  int64_t Value = 0;
 
-  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
   if (Res != MatchOperand_Success)
     return Res;
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+  if (ConvertResult && !ConvertResult(Value)) {
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy));
   return MatchOperand_Success;
 }
 
@@ -1327,101 +1613,52 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
   return MatchOperand_Success;
 }
 
-static bool operandsHasOptionalOp(const OperandVector &Operands,
-                                  const OptionalOperand &OOp) {
-  for (unsigned i = 0; i < Operands.size(); i++) {
-    const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
-    if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
-        (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
-      return true;
+typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
 
+void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
+                           OptionalImmIndexMap& OptionalIdx,
+                           enum AMDGPUOperand::ImmTy ImmT, int64_t Default = 0) {
+  auto i = OptionalIdx.find(ImmT);
+  if (i != OptionalIdx.end()) {
+    unsigned Idx = i->second;
+    ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
+  } else {
+    Inst.addOperand(MCOperand::createImm(Default));
   }
-  return false;
 }
 
 AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
-                                   OperandVector &Operands) {
-  SMLoc S = Parser.getTok().getLoc();
-  for (const OptionalOperand &Op : OptionalOps) {
-    if (operandsHasOptionalOp(Operands, Op))
-      continue;
-    AMDGPUAsmParser::OperandMatchResultTy Res;
-    int64_t Value;
-    if (Op.IsBit) {
-      Res = parseNamedBit(Op.Name, Operands, Op.Type);
-      if (Res == MatchOperand_NoMatch)
-        continue;
-      return Res;
-    }
-
-    Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
-
-    if (Res == MatchOperand_NoMatch)
-      continue;
-
-    if (Res != MatchOperand_Success)
-      return Res;
+AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    return MatchOperand_NoMatch;
+  }
+  StringRef Tok = Parser.getTok().getString();
+  if (Tok != Prefix) {
+    return MatchOperand_NoMatch;
+  }
 
-    if (Op.ConvertResult && !Op.ConvertResult(Value)) {
-      return MatchOperand_ParseFail;
-    }
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::Colon)) {
+    return MatchOperand_ParseFail;
+  }
 
-    Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
-    return MatchOperand_Success;
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    return MatchOperand_ParseFail;
   }
-  return MatchOperand_NoMatch;
+
+  Value = Parser.getTok().getString();
+  return MatchOperand_Success;
 }
 
 //===----------------------------------------------------------------------===//
 // ds
 //===----------------------------------------------------------------------===//
 
-static const OptionalOperand DSOptionalOps [] = {
-  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
-  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
-};
-
-static const OptionalOperand DSOptionalOpsOff01 [] = {
-  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
-  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
-  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
-};
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(DSOptionalOps, Operands);
-}
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(DSOptionalOpsOff01, Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
-  SMLoc S = Parser.getTok().getLoc();
-  AMDGPUAsmParser::OperandMatchResultTy Res =
-    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
-  if (Res == MatchOperand_NoMatch) {
-    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
-                       AMDGPUOperand::ImmTyOffset));
-    Res = MatchOperand_Success;
-  }
-  return Res;
-}
-
-bool AMDGPUOperand::isDSOffset() const {
-  return isImm() && isUInt<16>(getImm());
-}
-
-bool AMDGPUOperand::isDSOffset01() const {
-  return isImm() && isUInt<8>(getImm());
-}
-
 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
                                     const OperandVector &Operands) {
 
-  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+  OptionalImmIndexMap OptionalIdx;
 
   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@@ -1436,13 +1673,10 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
     OptionalIdx[Op.getImmTy()] = i;
   }
 
-  unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
-  unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
-  unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
 
-  ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
-  ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
-  ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
 }
 
@@ -1469,12 +1703,11 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
     OptionalIdx[Op.getImmTy()] = i;
   }
 
-  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
-  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
 
   if (!GDSOnly) {
-    unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
-    ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
   }
   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
 }
@@ -1516,7 +1749,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
     CntMask = 0x7;
     CntShift = 4;
   } else if (CntName == "lgkmcnt") {
-    CntMask = 0x7;
+    CntMask = 0xf;
     CntShift = 8;
   } else {
     return true;
@@ -1532,8 +1765,8 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   // Disable all counters by default.
   // vmcnt   [3:0]
   // expcnt  [6:4]
-  // lgkmcnt [10:8]
-  int64_t CntVal = 0x77f;
+  // lgkmcnt [11:8]
+  int64_t CntVal = 0xf7f;
   SMLoc S = Parser.getTok().getLoc();
 
   switch(getLexer().getKind()) {
@@ -1555,10 +1788,298 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width) {
+  using namespace llvm::AMDGPU::Hwreg;
+
+  if (Parser.getTok().getString() != "hwreg")
+    return true;
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::LParen))
+    return true;
+  Parser.Lex();
+
+  if (getLexer().is(AsmToken::Identifier)) {
+    HwReg.IsSymbolic = true;
+    HwReg.Id = ID_UNKNOWN_;
+    const StringRef tok = Parser.getTok().getString();
+    for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
+      if (tok == IdSymbolic[i]) {
+        HwReg.Id = i;
+        break;
+      }
+    }
+    Parser.Lex();
+  } else {
+    HwReg.IsSymbolic = false;
+    if (getLexer().isNot(AsmToken::Integer))
+      return true;
+    if (getParser().parseAbsoluteExpression(HwReg.Id))
+      return true;
+  }
+
+  if (getLexer().is(AsmToken::RParen)) {
+    Parser.Lex();
+    return false;
+  }
+
+  // optional params
+  if (getLexer().isNot(AsmToken::Comma))
+    return true;
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::Integer))
+    return true;
+  if (getParser().parseAbsoluteExpression(Offset))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return true;
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::Integer))
+    return true;
+  if (getParser().parseAbsoluteExpression(Width))
+    return true;
+
+  if (getLexer().isNot(AsmToken::RParen))
+    return true;
+  Parser.Lex();
+
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
+  using namespace llvm::AMDGPU::Hwreg;
+
+  int64_t Imm16Val = 0;
+  SMLoc S = Parser.getTok().getLoc();
+
+  switch(getLexer().getKind()) {
+    default: return MatchOperand_NoMatch;
+    case AsmToken::Integer:
+      // The operand can be an integer value.
+      if (getParser().parseAbsoluteExpression(Imm16Val))
+        return MatchOperand_NoMatch;
+      if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
+        Error(S, "invalid immediate: only 16-bit values are legal");
+        // Do not return error code, but create an imm operand anyway and proceed
+        // to the next operand, if any. That avoids unneccessary error messages.
+      }
+      break;
+
+    case AsmToken::Identifier: {
+        OperandInfoTy HwReg(ID_UNKNOWN_);
+        int64_t Offset = OFFSET_DEFAULT_;
+        int64_t Width = WIDTH_M1_DEFAULT_ + 1;
+        if (parseHwregConstruct(HwReg, Offset, Width))
+          return MatchOperand_ParseFail;
+        if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
+          if (HwReg.IsSymbolic)
+            Error(S, "invalid symbolic name of hardware register");
+          else
+            Error(S, "invalid code of hardware register: only 6-bit values are legal");
+        }
+        if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
+          Error(S, "invalid bit offset: only 5-bit values are legal");
+        if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
+          Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
+        Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
+      }
+      break;
+  }
+  Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
+  return MatchOperand_Success;
+}
+
 bool AMDGPUOperand::isSWaitCnt() const {
   return isImm();
 }
 
+bool AMDGPUOperand::isHwreg() const {
+  return isImmTy(ImmTyHwreg);
+}
+
+bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
+  using namespace llvm::AMDGPU::SendMsg;
+
+  if (Parser.getTok().getString() != "sendmsg")
+    return true;
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::LParen))
+    return true;
+  Parser.Lex();
+
+  if (getLexer().is(AsmToken::Identifier)) {
+    Msg.IsSymbolic = true;
+    Msg.Id = ID_UNKNOWN_;
+    const std::string tok = Parser.getTok().getString();
+    for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
+      switch(i) {
+        default: continue; // Omit gaps.
+        case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
+      }
+      if (tok == IdSymbolic[i]) {
+        Msg.Id = i;
+        break;
+      }
+    }
+    Parser.Lex();
+  } else {
+    Msg.IsSymbolic = false;
+    if (getLexer().isNot(AsmToken::Integer))
+      return true;
+    if (getParser().parseAbsoluteExpression(Msg.Id))
+      return true;
+    if (getLexer().is(AsmToken::Integer))
+      if (getParser().parseAbsoluteExpression(Msg.Id))
+        Msg.Id = ID_UNKNOWN_;
+  }
+  if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
+    return false;
+
+  if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
+    if (getLexer().isNot(AsmToken::RParen))
+      return true;
+    Parser.Lex();
+    return false;
+  }
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return true;
+  Parser.Lex();
+
+  assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
+  Operation.Id = ID_UNKNOWN_;
+  if (getLexer().is(AsmToken::Identifier)) {
+    Operation.IsSymbolic = true;
+    const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
+    const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
+    const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
+    const StringRef Tok = Parser.getTok().getString();
+    for (int i = F; i < L; ++i) {
+      if (Tok == S[i]) {
+        Operation.Id = i;
+        break;
+      }
+    }
+    Parser.Lex();
+  } else {
+    Operation.IsSymbolic = false;
+    if (getLexer().isNot(AsmToken::Integer))
+      return true;
+    if (getParser().parseAbsoluteExpression(Operation.Id))
+      return true;
+  }
+
+  if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
+    // Stream id is optional.
+    if (getLexer().is(AsmToken::RParen)) {
+      Parser.Lex();
+      return false;
+    }
+
+    if (getLexer().isNot(AsmToken::Comma))
+      return true;
+    Parser.Lex();
+
+    if (getLexer().isNot(AsmToken::Integer))
+      return true;
+    if (getParser().parseAbsoluteExpression(StreamId))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::RParen))
+    return true;
+  Parser.Lex();
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
+  using namespace llvm::AMDGPU::SendMsg;
+
+  int64_t Imm16Val = 0;
+  SMLoc S = Parser.getTok().getLoc();
+
+  switch(getLexer().getKind()) {
+  default:
+    return MatchOperand_NoMatch;
+  case AsmToken::Integer:
+    // The operand can be an integer value.
+    if (getParser().parseAbsoluteExpression(Imm16Val))
+      return MatchOperand_NoMatch;
+    if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
+      Error(S, "invalid immediate: only 16-bit values are legal");
+      // Do not return error code, but create an imm operand anyway and proceed
+      // to the next operand, if any. That avoids unneccessary error messages.
+    }
+    break;
+  case AsmToken::Identifier: {
+      OperandInfoTy Msg(ID_UNKNOWN_);
+      OperandInfoTy Operation(OP_UNKNOWN_);
+      int64_t StreamId = STREAM_ID_DEFAULT_;
+      if (parseSendMsgConstruct(Msg, Operation, StreamId))
+        return MatchOperand_ParseFail;
+      do {
+        // Validate and encode message ID.
+        if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
+                || Msg.Id == ID_SYSMSG)) {
+          if (Msg.IsSymbolic)
+            Error(S, "invalid/unsupported symbolic name of message");
+          else
+            Error(S, "invalid/unsupported code of message");
+          break;
+        }
+        Imm16Val = (Msg.Id << ID_SHIFT_);
+        // Validate and encode operation ID.
+        if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
+          if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
+            if (Operation.IsSymbolic)
+              Error(S, "invalid symbolic name of GS_OP");
+            else
+              Error(S, "invalid code of GS_OP: only 2-bit values are legal");
+            break;
+          }
+          if (Operation.Id == OP_GS_NOP
+              && Msg.Id != ID_GS_DONE) {
+            Error(S, "invalid GS_OP: NOP is for GS_DONE only");
+            break;
+          }
+          Imm16Val |= (Operation.Id << OP_SHIFT_);
+        }
+        if (Msg.Id == ID_SYSMSG) {
+          if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
+            if (Operation.IsSymbolic)
+              Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
+            else
+              Error(S, "invalid/unsupported code of SYSMSG_OP");
+            break;
+          }
+          Imm16Val |= (Operation.Id << OP_SHIFT_);
+        }
+        // Validate and encode stream ID.
+        if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
+          if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
+            Error(S, "invalid stream id: only 2-bit values are legal");
+            break;
+          }
+          Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
+        }
+      } while (0);
+    }
+    break;
+  }
+  Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
+  return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isSendMsg() const {
+  return isImmTy(ImmTySendMsg);
+}
+
 //===----------------------------------------------------------------------===//
 // sopp branch targets
 //===----------------------------------------------------------------------===//
@@ -1587,33 +2108,26 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
 }
 
 //===----------------------------------------------------------------------===//
-// flat
+// mubuf
 //===----------------------------------------------------------------------===//
 
-static const OptionalOperand FlatOptionalOps [] = {
-  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
-  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
-  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
-};
-
-static const OptionalOperand FlatAtomicOptionalOps [] = {
-  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
-  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
-};
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyGLC);
+}
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseFlatOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(FlatOptionalOps, Operands);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTySLC);
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseFlatAtomicOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(FlatAtomicOptionalOps, Operands);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyTFE);
 }
 
-void AMDGPUAsmParser::cvtFlat(MCInst &Inst,
-                               const OperandVector &Operands) {
-  std::map<AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
+                               const OperandVector &Operands,
+                               bool IsAtomic, bool IsAtomicReturn) {
+  OptionalImmIndexMap OptionalIdx;
+  assert(IsAtomicReturn ? IsAtomic : true);
 
   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@@ -1624,129 +2138,128 @@ void AMDGPUAsmParser::cvtFlat(MCInst &Inst,
       continue;
     }
 
-    // Handle 'glc' token which is sometimes hard-coded into the
+    // Handle the case where soffset is an immediate
+    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+      Op.addImmOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle tokens like 'offen' which are sometimes hard-coded into the
     // asm string.  There are no MCInst operands for these.
-    if (Op.isToken())
+    if (Op.isToken()) {
       continue;
+    }
+    assert(Op.isImm());
 
     // Handle optional arguments
     OptionalIdx[Op.getImmTy()] = i;
-
   }
 
-  // flat atomic instructions don't have a glc argument.
-  if (OptionalIdx.count(AMDGPUOperand::ImmTyGLC)) {
-    unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
-    ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+  // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
+  if (IsAtomicReturn) {
+    MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
+    Inst.insert(I, *I);
   }
 
-  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
-  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
-
-  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
-  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
+  if (!IsAtomic) { // glc is hard-coded.
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
+  }
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
 }
 
 //===----------------------------------------------------------------------===//
-// mubuf
+// mimg
 //===----------------------------------------------------------------------===//
 
-static const OptionalOperand MubufOptionalOps [] = {
-  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
-  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
-  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
-  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
-};
+void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) {
+  unsigned I = 1;
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+  }
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(MubufOptionalOps, Operands);
-}
+  OptionalImmIndexMap OptionalIdx;
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
-  return parseIntWithPrefix("offset", Operands);
-}
+  for (unsigned E = Operands.size(); I != E; ++I) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
-  return parseNamedBit("glc", Operands);
-}
+    // Add the register arguments
+    if (Op.isRegOrImm()) {
+      Op.addRegOrImmOperands(Inst, 1);
+      continue;
+    } else if (Op.isImmModifier()) {
+      OptionalIdx[Op.getImmTy()] = I;
+    } else {
+      assert(false);
+    }
+  }
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
-  return parseNamedBit("slc", Operands);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
-  return parseNamedBit("tfe", Operands);
-}
+void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
+  unsigned I = 1;
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+  }
 
-bool AMDGPUOperand::isMubufOffset() const {
-  return isImm() && isUInt<12>(getImm());
-}
+  // Add src, same as dst
+  ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
 
-void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
-                               const OperandVector &Operands) {
-  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+  OptionalImmIndexMap OptionalIdx;
 
-  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
-    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+  for (unsigned E = Operands.size(); I != E; ++I) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
 
     // Add the register arguments
-    if (Op.isReg()) {
-      Op.addRegOperands(Inst, 1);
-      continue;
-    }
-
-    // Handle the case where soffset is an immediate
-    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
-      Op.addImmOperands(Inst, 1);
-      continue;
-    }
-
-    // Handle tokens like 'offen' which are sometimes hard-coded into the
-    // asm string.  There are no MCInst operands for these.
-    if (Op.isToken()) {
+    if (Op.isRegOrImm()) {
+      Op.addRegOrImmOperands(Inst, 1);
       continue;
+    } else if (Op.isImmModifier()) {
+      OptionalIdx[Op.getImmTy()] = I;
+    } else {
+      assert(false);
     }
-    assert(Op.isImm());
-
-    // Handle optional arguments
-    OptionalIdx[Op.getImmTy()] = i;
   }
 
-  assert(OptionalIdx.size() == 4);
-
-  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
-  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
-  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
-  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+}
 
-  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
-  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
-  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
-  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDMask);
 }
 
-//===----------------------------------------------------------------------===//
-// mimg
-//===----------------------------------------------------------------------===//
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
+}
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
-  return parseIntWithPrefix("dmask", Operands);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDA);
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
-  return parseNamedBit("unorm", Operands);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyR128);
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseR128(OperandVector &Operands) {
-  return parseNamedBit("r128", Operands);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyLWE);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1766,6 +2279,14 @@ bool AMDGPUOperand::isSMRDLiteralOffset() const {
   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
 }
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
 //===----------------------------------------------------------------------===//
 // vop3
 //===----------------------------------------------------------------------===//
@@ -1792,91 +2313,435 @@ static bool ConvertOmodDiv(int64_t &Div) {
   return false;
 }
 
-static const OptionalOperand VOP3OptionalOps [] = {
-  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
-  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
-  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
+static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
+  if (BoundCtrl == 0) {
+    BoundCtrl = 1;
+    return true;
+  } else if (BoundCtrl == -1) {
+    BoundCtrl = 0;
+    return true;
+  }
+  return false;
+}
+
+// Note: the order in this table matches the order of operands in AsmString.
+static const OptionalOperand AMDGPUOptionalOperandTable[] = {
+  {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
+  {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
+  {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
+  {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
+  {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
+  {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
+  {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
+  {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
+  {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
+  {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
+  {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
+  {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
+  {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
+  {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
+  {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
+  {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
+  {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
+  {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
+  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
+  {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
+  {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
+  {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
+  {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
 };
 
-static bool isVOP3(OperandVector &Operands) {
-  if (operandsHaveModifiers(Operands))
-    return true;
+AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
+  OperandMatchResultTy res;
+  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
+    // try to parse any optional operand here
+    if (Op.IsBit) {
+      res = parseNamedBit(Op.Name, Operands, Op.Type);
+    } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
+      res = parseOModOperand(Operands);
+    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
+               Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
+               Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
+      res = parseSDWASel(Operands, Op.Name, Op.Type);
+    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
+      res = parseSDWADstUnused(Operands);
+    } else {
+      res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
+    }
+    if (res != MatchOperand_NoMatch) {
+      return res;
+    }
+  }
+  return MatchOperand_NoMatch;
+}
 
-  AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
+AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
+{
+  StringRef Name = Parser.getTok().getString();
+  if (Name == "mul") {
+    return parseIntWithPrefix("mul", Operands, AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
+  } else if (Name == "div") {
+    return parseIntWithPrefix("div", Operands, AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
+  } else {
+    return MatchOperand_NoMatch;
+  }
+}
 
-  if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
-    return true;
+void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) {
+  unsigned I = 1;
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+  }
+  for (unsigned E = Operands.size(); I != E; ++I)
+    ((AMDGPUOperand &)*Operands[I]).addRegOrImmOperands(Inst, 1);
+}
 
-  if (Operands.size() >= 5)
-    return true;
+void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands) {
+  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+  if (TSFlags & SIInstrFlags::VOP3) {
+    cvtVOP3(Inst, Operands);
+  } else {
+    cvtId(Inst, Operands);
+  }
+}
 
-  if (Operands.size() > 3) {
-    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
-    if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
-                            Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
-      return true;
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+  OptionalImmIndexMap OptionalIdx;
+  unsigned I = 1;
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+  }
+
+  for (unsigned E = Operands.size(); I != E; ++I) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+    if (Op.isRegOrImmWithInputMods()) {
+      // only fp modifiers allowed in VOP3
+      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+    } else if (Op.isImm()) {
+      OptionalIdx[Op.getImmTy()] = I;
+    } else {
+      assert(false);
+    }
+  }
+
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+}
+
+//===----------------------------------------------------------------------===//
+// dpp
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isDPPCtrl() const {
+  bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
+  if (result) {
+    int64_t Imm = getImm();
+    return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
+           ((Imm >= 0x101) && (Imm <= 0x10f)) ||
+           ((Imm >= 0x111) && (Imm <= 0x11f)) ||
+           ((Imm >= 0x121) && (Imm <= 0x12f)) ||
+           (Imm == 0x130) ||
+           (Imm == 0x134) ||
+           (Imm == 0x138) ||
+           (Imm == 0x13c) ||
+           (Imm == 0x140) ||
+           (Imm == 0x141) ||
+           (Imm == 0x142) ||
+           (Imm == 0x143);
   }
   return false;
 }
 
 AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
-
-  // The value returned by this function may change after parsing
-  // an operand so store the original value here.
-  bool HasModifiers = operandsHaveModifiers(Operands);
-
-  bool IsVOP3 = isVOP3(Operands);
-  if (HasModifiers || IsVOP3 ||
-      getLexer().isNot(AsmToken::EndOfStatement) ||
-      getForcedEncodingSize() == 64) {
-
-    AMDGPUAsmParser::OperandMatchResultTy Res =
-        parseOptionalOps(VOP3OptionalOps, Operands);
-
-    if (!HasModifiers && Res == MatchOperand_Success) {
-      // We have added a modifier operation, so we need to make sure all
-      // previous register operands have modifiers
-      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
-        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
-        if (Op.isReg())
-          Op.setModifiers(0);
+AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  StringRef Prefix;
+  int64_t Int;
+
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    Prefix = Parser.getTok().getString();
+  } else {
+    return MatchOperand_NoMatch;
+  }
+
+  if (Prefix == "row_mirror") {
+    Int = 0x140;
+  } else if (Prefix == "row_half_mirror") {
+    Int = 0x141;
+  } else {
+    // Check to prevent parseDPPCtrlOps from eating invalid tokens
+    if (Prefix != "quad_perm"
+        && Prefix != "row_shl"
+        && Prefix != "row_shr"
+        && Prefix != "row_ror"
+        && Prefix != "wave_shl"
+        && Prefix != "wave_rol"
+        && Prefix != "wave_shr"
+        && Prefix != "wave_ror"
+        && Prefix != "row_bcast") {
+      return MatchOperand_NoMatch;
+    }
+
+    Parser.Lex();
+    if (getLexer().isNot(AsmToken::Colon))
+      return MatchOperand_ParseFail;
+
+    if (Prefix == "quad_perm") {
+      // quad_perm:[%d,%d,%d,%d]
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::LBrac))
+        return MatchOperand_ParseFail;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+      Int = getLexer().getTok().getIntVal();
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Comma))
+        return MatchOperand_ParseFail;
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+      Int += (getLexer().getTok().getIntVal() << 2);
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Comma))
+        return MatchOperand_ParseFail;
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+      Int += (getLexer().getTok().getIntVal() << 4);
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Comma))
+        return MatchOperand_ParseFail;
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+      Int += (getLexer().getTok().getIntVal() << 6);
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::RBrac))
+        return MatchOperand_ParseFail;
+
+    } else {
+      // sel:%d
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+      Int = getLexer().getTok().getIntVal();
+
+      if (Prefix == "row_shl") {
+        Int |= 0x100;
+      } else if (Prefix == "row_shr") {
+        Int |= 0x110;
+      } else if (Prefix == "row_ror") {
+        Int |= 0x120;
+      } else if (Prefix == "wave_shl") {
+        Int = 0x130;
+      } else if (Prefix == "wave_rol") {
+        Int = 0x134;
+      } else if (Prefix == "wave_shr") {
+        Int = 0x138;
+      } else if (Prefix == "wave_ror") {
+        Int = 0x13C;
+      } else if (Prefix == "row_bcast") {
+        if (Int == 15) {
+          Int = 0x142;
+        } else if (Int == 31) {
+          Int = 0x143;
+        } else {
+          return MatchOperand_ParseFail;
+        }
+      } else {
+        return MatchOperand_ParseFail;
       }
     }
-    return Res;
   }
-  return MatchOperand_NoMatch;
+  Parser.Lex(); // eat last token
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
+                                              AMDGPUOperand::ImmTyDppCtrl));
+  return MatchOperand_Success;
 }
 
-void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
+  return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
+  return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
+  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
+}
+
+void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
+  OptionalImmIndexMap OptionalIdx;
 
-  unsigned i = 1;
+  unsigned I = 1;
   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
-  if (Desc.getNumDefs() > 0) {
-    ((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1);
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   }
 
-  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+  for (unsigned E = Operands.size(); I != E; ++I) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+    // Add the register arguments
+    if (Op.isRegOrImmWithInputMods()) {
+      // Only float modifiers supported in DPP
+      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+    } else if (Op.isDPPCtrl()) {
+      Op.addImmOperands(Inst, 1);
+    } else if (Op.isImm()) {
+      // Handle optional arguments
+      OptionalIdx[Op.getImmTy()] = I;
+    } else {
+      llvm_unreachable("Invalid operand type");
+    }
+  }
 
-  if (operandsHaveModifiers(Operands)) {
-    for (unsigned e = Operands.size(); i != e; ++i) {
-      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+}
 
-      if (Op.isRegWithInputMods()) {
-        ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
-        continue;
-      }
-      OptionalIdx[Op.getImmTy()] = i;
-    }
+//===----------------------------------------------------------------------===//
+// sdwa
+//===----------------------------------------------------------------------===//
 
-    unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
-    unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
+                              AMDGPUOperand::ImmTy Type) {
+  SMLoc S = Parser.getTok().getLoc();
+  StringRef Value;
+  AMDGPUAsmParser::OperandMatchResultTy res;
 
-    ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
-    ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
-  } else {
-    for (unsigned e = Operands.size(); i != e; ++i)
-      ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
+  res = parseStringWithPrefix(Prefix, Value);
+  if (res != MatchOperand_Success) {
+    return res;
+  }
+
+  int64_t Int;
+  Int = StringSwitch<int64_t>(Value)
+        .Case("BYTE_0", 0)
+        .Case("BYTE_1", 1)
+        .Case("BYTE_2", 2)
+        .Case("BYTE_3", 3)
+        .Case("WORD_0", 4)
+        .Case("WORD_1", 5)
+        .Case("DWORD", 6)
+        .Default(0xffffffff);
+  Parser.Lex(); // eat last token
+
+  if (Int == 0xffffffff) {
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Int, S, Type));
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  StringRef Value;
+  AMDGPUAsmParser::OperandMatchResultTy res;
+
+  res = parseStringWithPrefix("dst_unused", Value);
+  if (res != MatchOperand_Success) {
+    return res;
+  }
+
+  int64_t Int;
+  Int = StringSwitch<int64_t>(Value)
+        .Case("UNUSED_PAD", 0)
+        .Case("UNUSED_SEXT", 1)
+        .Case("UNUSED_PRESERVE", 2)
+        .Default(0xffffffff);
+  Parser.Lex(); // eat last token
+
+  if (Int == 0xffffffff) {
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
+                                              AMDGPUOperand::ImmTySdwaDstUnused));
+  return MatchOperand_Success;
+}
+
+void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
+  cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
+}
+
+void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
+  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
+}
+
+void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
+  cvtSDWA(Inst, Operands, SIInstrFlags::VOPC);
+}
+
+void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
+                              uint64_t BasicInstType) {
+  OptionalImmIndexMap OptionalIdx;
+
+  unsigned I = 1;
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+  }
+
+  for (unsigned E = Operands.size(); I != E; ++I) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+    // Add the register arguments
+    if (BasicInstType == SIInstrFlags::VOPC &&
+        Op.isReg() &&
+        Op.Reg.RegNo == AMDGPU::VCC) {
+      // VOPC sdwa use "vcc" token as dst. Skip it.
+      continue;
+    } else if (Op.isRegOrImmWithInputMods()) {
+       Op.addRegOrImmWithInputModsOperands(Inst, 2);
+    } else if (Op.isImm()) {
+      // Handle optional arguments
+      OptionalIdx[Op.getImmTy()] = I;
+    } else {
+      llvm_unreachable("Invalid operand type");
+    }
+  }
+
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+  
+  if (Inst.getOpcode() == AMDGPU::V_NOP_sdwa) {
+    // V_NOP_sdwa has no optional sdwa arguments
+    return;
+  }
+  switch (BasicInstType) {
+  case SIInstrFlags::VOP1: {
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+    break;
+  }
+  case SIInstrFlags::VOP2: {
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+    break;
+  }
+  case SIInstrFlags::VOPC: {
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+    break;
+  }
+  default:
+    llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
   }
 }
 
@@ -1890,3 +2755,37 @@ extern "C" void LLVMInitializeAMDGPUAsmParser() {
 #define GET_MATCHER_IMPLEMENTATION
 #include "AMDGPUGenAsmMatcher.inc"
 
+
+// This fuction should be defined after auto-generated include so that we have
+// MatchClassKind enum defined
+unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
+                                                     unsigned Kind) {
+  // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
+  // But MatchInstructionImpl() expects to meet token and fails to validate
+  // operand. This method checks if we are given immediate operand but expect to
+  // get corresponding token.
+  AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
+  switch (Kind) {
+  case MCK_addr64:
+    return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
+  case MCK_gds:
+    return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
+  case MCK_glc:
+    return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
+  case MCK_idxen:
+    return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
+  case MCK_offen:
+    return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
+  case MCK_SSrc32:
+    // When operands have expression values, they will return true for isToken,
+    // because it is not possible to distinguish between a token and an
+    // expression at parse time. MatchInstructionImpl() will always try to
+    // match an operand as a token, when isToken returns true, and when the
+    // name of the expression is not a valid token, the match will fail,
+    // so we need to handle it here.
+    return Operand.isSSrc32() ? Match_Success : Match_InvalidOperand;
+  case MCK_SoppBrTarget:
+    return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
+  default: return Match_InvalidOperand;
+  }
+}
author	Dimitry Andric <dim@FreeBSD.org>	2016-07-23 20:41:05 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2016-07-23 20:41:05 +0000
commit	01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree	4def12e759965de927d963ac65840d663ef9d1ea /lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
parent	f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)