vendor/llvm/llvm-trunk-r305145

author: Dimitry Andric <dim@FreeBSD.org> 2017-06-10 13:44:06 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-06-10 13:44:06 +0000
commit: 7ab83427af0f77b59941ceba41d509d7d097b065 (patch)
tree: cc41c05b1db454e3d802f34df75e636ee922ad87 /lib/Target/AMDGPU
parent: d288ef4c1788d3a951a7558c68312c2d320612b1 (diff)
63 files changed, 715 insertions, 928 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index b50e8d1d659e..6ab2b9ef0459 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -447,6 +447,16 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
   Implies
 >;
 
+def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
+  [FeatureSouthernIslands,
+   FeatureFastFMAF32, 
+   HalfRate64Ops,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
+  [FeatureSouthernIslands,
+   FeatureLDSBankCount32]>;
+   
 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
   [FeatureSeaIslands,
    FeatureLDSBankCount32]>;
@@ -461,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
   [FeatureSeaIslands,
    FeatureLDSBankCount16]>;
 
+def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
+  [FeatureSeaIslands,
+   FeatureLDSBankCount16]>;
+
 def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
   [FeatureVolcanicIslands,
    FeatureLDSBankCount32,
@@ -489,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
    FeatureLDSBankCount16,
    FeatureXNACK]>;
 
-def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
-def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
+def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
+  [FeatureGFX9,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,
+  [FeatureGFX9,
+   FeatureLDSBankCount32,
+   FeatureXNACK]>;
+
+def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
+  [FeatureGFX9,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3,
+  [FeatureGFX9,
+   FeatureLDSBankCount32,
+   FeatureXNACK]>;
 
 //===----------------------------------------------------------------------===//
 // Debugger related subtarget features.
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 3c99f48e818a..faa424eb0a64 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,15 +10,15 @@
 /// This is the AMGPU address space based alias analysis pass.
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 91b3649f5c39..3c788fa1dcea 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -19,8 +19,8 @@
 #include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0959014812d8..83ad1a5c6ee3 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,25 +17,25 @@
 //
 
 #include "AMDGPUAsmPrinter.h"
-#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUTargetStreamer.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "Utils/AMDGPUBaseInfo.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "R600Defines.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600RegisterInfo.h"
 #include "SIDefines.h"
-#include "SIMachineFunctionInfo.h"
 #include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index e5adeeb465e1..0a58ce06704d 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
 
-#include "AMDKernelCodeT.h"
 #include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include <cstddef>
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index e67ae092fdda..515cc07dd449 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -18,8 +18,8 @@
 #include "AMDGPUISelLowering.h"
 #include "AMDGPUSubtarget.h"
 #include "SIISelLowering.h"
-#include "SIRegisterInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index d923cb117c12..b312dbc8d14d 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -25,13 +25,13 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 19fce064783d..251c2f9bb25a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPURegisterInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
 #include "SIISelLowering.h"
+#include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 723e8a7b54e2..5586b513b5fc 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -21,6 +21,7 @@
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "R600MachineFunctionInfo.h"
+#include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,7 +31,6 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/Support/KnownBits.h"
-#include "SIInstrInfo.h"
 using namespace llvm;
 
 static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 12caa5118342..41cc7d7093ec 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -17,8 +17,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
 
 #include "AMDGPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "AMDGPUGenInstrInfo.inc"
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c87102e55dfb..ef845f44d365 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -15,9 +15,9 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
 
 #include "AMDGPU.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 267f4807a788..b889788c3426 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -14,10 +14,10 @@
 
 #include "AMDGPULegalizerInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOpcodes.h"
 
 using namespace llvm;
 
@@ -47,12 +47,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
   setAction({G_GEP, P2}, Legal);
   setAction({G_GEP, 1, S64}, Legal);
 
+  setAction({G_ICMP, S1}, Legal);
+  setAction({G_ICMP, 1, S32}, Legal);
+
   setAction({G_LOAD, P1}, Legal);
   setAction({G_LOAD, P2}, Legal);
   setAction({G_LOAD, S32}, Legal);
   setAction({G_LOAD, 1, P1}, Legal);
   setAction({G_LOAD, 1, P2}, Legal);
 
+  setAction({G_SELECT, S32}, Legal);
+  setAction({G_SELECT, 1, S1}, Legal);
+
   setAction({G_STORE, S32}, Legal);
   setAction({G_STORE, 1, P1}, Legal);
 
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index f1ef6281c90f..63dd0d726d91 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -38,7 +38,6 @@ using namespace llvm;
 
 #include "AMDGPUGenMCPseudoLowering.inc"
 
-
 AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st,
                                      const AsmPrinter &ap):
   Ctx(ctx), ST(st), AP(ap) { }
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 6d2785ba1c60..2071b6f157cd 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
-#include "SIInstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 8bfeb67ad4ec..99bb61b21db0 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
 
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 07f92918a43f..625c9b77e2de 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -33,11 +33,11 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -319,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
   switch (Inst->getOpcode()) {
   case Instruction::Load: {
     LoadInst *LI = cast<LoadInst>(Inst);
-    return !LI->isVolatile();
+    // Currently only handle the case where the Pointer Operand is a GEP so check for that case.
+    return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile();
   }
   case Instruction::BitCast:
   case Instruction::AddrSpaceCast:
     return true;
   case Instruction::Store: {
-    // Must be the stored pointer operand, not a stored value.
+    // Must be the stored pointer operand, not a stored value, plus
+    // since it should be canonical form, the User should be a GEP.
     StoreInst *SI = cast<StoreInst>(Inst);
-    return (SI->getPointerOperand() == User) && !SI->isVolatile();
+    return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile();
   }
   default:
     return false;
@@ -341,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
 
   // FIXME: There is no reason why we can't support larger arrays, we
   // are just being conservative for now.
+  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
+  // could also be promoted but we don't currently handle this case
   if (!AllocaTy ||
       AllocaTy->getElementType()->isVectorTy() ||
+      AllocaTy->getElementType()->isArrayTy() ||
       AllocaTy->getNumElements() > 4 ||
       AllocaTy->getNumElements() < 2) {
     DEBUG(dbgs() << "  Cannot convert type to vector\n");
@@ -390,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
     switch (Inst->getOpcode()) {
     case Instruction::Load: {
       Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
-      Value *Ptr = Inst->getOperand(0);
+      Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
 
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
@@ -403,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
     case Instruction::Store: {
       Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
 
-      Value *Ptr = Inst->getOperand(1);
+      StoreInst *SI = cast<StoreInst>(Inst);
+      Value *Ptr = SI->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
       Value *VecValue = Builder.CreateLoad(BitCast);
       Value *NewVecValue = Builder.CreateInsertElement(VecValue,
-                                                       Inst->getOperand(0),
+                                                       SI->getValueOperand(),
                                                        Index);
       Builder.CreateStore(NewVecValue, BitCast);
       Inst->eraseFromParent();
diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
new file mode 100644
index 000000000000..36d88f52910d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
@@ -0,0 +1,353 @@
+//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===//
+
+#ifdef AMDGPU_REG_ASM_NAMES
+
+static const char *const VGPR32RegNames[] = {
+    "v0",   "v1",   "v2",   "v3",   "v4",   "v5",   "v6",   "v7",   "v8",
+    "v9",   "v10",  "v11",  "v12",  "v13",  "v14",  "v15",  "v16",  "v17",
+    "v18",  "v19",  "v20",  "v21",  "v22",  "v23",  "v24",  "v25",  "v26",
+    "v27",  "v28",  "v29",  "v30",  "v31",  "v32",  "v33",  "v34",  "v35",
+    "v36",  "v37",  "v38",  "v39",  "v40",  "v41",  "v42",  "v43",  "v44",
+    "v45",  "v46",  "v47",  "v48",  "v49",  "v50",  "v51",  "v52",  "v53",
+    "v54",  "v55",  "v56",  "v57",  "v58",  "v59",  "v60",  "v61",  "v62",
+    "v63",  "v64",  "v65",  "v66",  "v67",  "v68",  "v69",  "v70",  "v71",
+    "v72",  "v73",  "v74",  "v75",  "v76",  "v77",  "v78",  "v79",  "v80",
+    "v81",  "v82",  "v83",  "v84",  "v85",  "v86",  "v87",  "v88",  "v89",
+    "v90",  "v91",  "v92",  "v93",  "v94",  "v95",  "v96",  "v97",  "v98",
+    "v99",  "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
+    "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
+    "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
+    "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
+    "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
+    "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
+    "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
+    "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
+    "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
+    "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
+    "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
+    "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
+    "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
+    "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
+    "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
+    "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
+    "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
+    "v252", "v253", "v254", "v255"
+};
+
+static const char *const SGPR32RegNames[] = {
+    "s0",   "s1",   "s2",   "s3",   "s4",  "s5",  "s6",  "s7",  "s8",  "s9",
+    "s10",  "s11",  "s12",  "s13",  "s14", "s15", "s16", "s17", "s18", "s19",
+    "s20",  "s21",  "s22",  "s23",  "s24", "s25", "s26", "s27", "s28", "s29",
+    "s30",  "s31",  "s32",  "s33",  "s34", "s35", "s36", "s37", "s38", "s39",
+    "s40",  "s41",  "s42",  "s43",  "s44", "s45", "s46", "s47", "s48", "s49",
+    "s50",  "s51",  "s52",  "s53",  "s54", "s55", "s56", "s57", "s58", "s59",
+    "s60",  "s61",  "s62",  "s63",  "s64", "s65", "s66", "s67", "s68", "s69",
+    "s70",  "s71",  "s72",  "s73",  "s74", "s75", "s76", "s77", "s78", "s79",
+    "s80",  "s81",  "s82",  "s83",  "s84", "s85", "s86", "s87", "s88", "s89",
+    "s90",  "s91",  "s92",  "s93",  "s94", "s95", "s96", "s97", "s98", "s99",
+    "s100", "s101", "s102", "s103"
+};
+
+static const char *const VGPR64RegNames[] = {
+    "v[0:1]",     "v[1:2]",     "v[2:3]",     "v[3:4]",     "v[4:5]",
+    "v[5:6]",     "v[6:7]",     "v[7:8]",     "v[8:9]",     "v[9:10]",
+    "v[10:11]",   "v[11:12]",   "v[12:13]",   "v[13:14]",   "v[14:15]",
+    "v[15:16]",   "v[16:17]",   "v[17:18]",   "v[18:19]",   "v[19:20]",
+    "v[20:21]",   "v[21:22]",   "v[22:23]",   "v[23:24]",   "v[24:25]",
+    "v[25:26]",   "v[26:27]",   "v[27:28]",   "v[28:29]",   "v[29:30]",
+    "v[30:31]",   "v[31:32]",   "v[32:33]",   "v[33:34]",   "v[34:35]",
+    "v[35:36]",   "v[36:37]",   "v[37:38]",   "v[38:39]",   "v[39:40]",
+    "v[40:41]",   "v[41:42]",   "v[42:43]",   "v[43:44]",   "v[44:45]",
+    "v[45:46]",   "v[46:47]",   "v[47:48]",   "v[48:49]",   "v[49:50]",
+    "v[50:51]",   "v[51:52]",   "v[52:53]",   "v[53:54]",   "v[54:55]",
+    "v[55:56]",   "v[56:57]",   "v[57:58]",   "v[58:59]",   "v[59:60]",
+    "v[60:61]",   "v[61:62]",   "v[62:63]",   "v[63:64]",   "v[64:65]",
+    "v[65:66]",   "v[66:67]",   "v[67:68]",   "v[68:69]",   "v[69:70]",
+    "v[70:71]",   "v[71:72]",   "v[72:73]",   "v[73:74]",   "v[74:75]",
+    "v[75:76]",   "v[76:77]",   "v[77:78]",   "v[78:79]",   "v[79:80]",
+    "v[80:81]",   "v[81:82]",   "v[82:83]",   "v[83:84]",   "v[84:85]",
+    "v[85:86]",   "v[86:87]",   "v[87:88]",   "v[88:89]",   "v[89:90]",
+    "v[90:91]",   "v[91:92]",   "v[92:93]",   "v[93:94]",   "v[94:95]",
+    "v[95:96]",   "v[96:97]",   "v[97:98]",   "v[98:99]",   "v[99:100]",
+    "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]",
+    "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]",
+    "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]",
+    "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]",
+    "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]",
+    "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]",
+    "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]",
+    "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]",
+    "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]",
+    "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]",
+    "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]",
+    "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]",
+    "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]",
+    "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]",
+    "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]",
+    "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]",
+    "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]",
+    "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]",
+    "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]",
+    "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]",
+    "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]",
+    "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]",
+    "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]",
+    "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]",
+    "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]",
+    "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]",
+    "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]",
+    "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]",
+    "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]",
+    "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]",
+    "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]"
+};
+
+static const char *const VGPR96RegNames[] = {
+    "v[0:2]",     "v[1:3]",     "v[2:4]",     "v[3:5]",     "v[4:6]",
+    "v[5:7]",     "v[6:8]",     "v[7:9]",     "v[8:10]",    "v[9:11]",
+    "v[10:12]",   "v[11:13]",   "v[12:14]",   "v[13:15]",   "v[14:16]",
+    "v[15:17]",   "v[16:18]",   "v[17:19]",   "v[18:20]",   "v[19:21]",
+    "v[20:22]",   "v[21:23]",   "v[22:24]",   "v[23:25]",   "v[24:26]",
+    "v[25:27]",   "v[26:28]",   "v[27:29]",   "v[28:30]",   "v[29:31]",
+    "v[30:32]",   "v[31:33]",   "v[32:34]",   "v[33:35]",   "v[34:36]",
+    "v[35:37]",   "v[36:38]",   "v[37:39]",   "v[38:40]",   "v[39:41]",
+    "v[40:42]",   "v[41:43]",   "v[42:44]",   "v[43:45]",   "v[44:46]",
+    "v[45:47]",   "v[46:48]",   "v[47:49]",   "v[48:50]",   "v[49:51]",
+    "v[50:52]",   "v[51:53]",   "v[52:54]",   "v[53:55]",   "v[54:56]",
+    "v[55:57]",   "v[56:58]",   "v[57:59]",   "v[58:60]",   "v[59:61]",
+    "v[60:62]",   "v[61:63]",   "v[62:64]",   "v[63:65]",   "v[64:66]",
+    "v[65:67]",   "v[66:68]",   "v[67:69]",   "v[68:70]",   "v[69:71]",
+    "v[70:72]",   "v[71:73]",   "v[72:74]",   "v[73:75]",   "v[74:76]",
+    "v[75:77]",   "v[76:78]",   "v[77:79]",   "v[78:80]",   "v[79:81]",
+    "v[80:82]",   "v[81:83]",   "v[82:84]",   "v[83:85]",   "v[84:86]",
+    "v[85:87]",   "v[86:88]",   "v[87:89]",   "v[88:90]",   "v[89:91]",
+    "v[90:92]",   "v[91:93]",   "v[92:94]",   "v[93:95]",   "v[94:96]",
+    "v[95:97]",   "v[96:98]",   "v[97:99]",   "v[98:100]",  "v[99:101]",
+    "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]",
+    "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]",
+    "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]",
+    "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]",
+    "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]",
+    "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]",
+    "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]",
+    "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]",
+    "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]",
+    "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]",
+    "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]",
+    "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]",
+    "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]",
+    "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]",
+    "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]",
+    "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]",
+    "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]",
+    "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]",
+    "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]",
+    "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]",
+    "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]",
+    "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]",
+    "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]",
+    "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]",
+    "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]",
+    "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]",
+    "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]",
+    "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]",
+    "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]",
+    "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]",
+    "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]"
+};
+
+static const char *const VGPR128RegNames[] = {
+    "v[0:3]",     "v[1:4]",     "v[2:5]",     "v[3:6]",     "v[4:7]",
+    "v[5:8]",     "v[6:9]",     "v[7:10]",    "v[8:11]",    "v[9:12]",
+    "v[10:13]",   "v[11:14]",   "v[12:15]",   "v[13:16]",   "v[14:17]",
+    "v[15:18]",   "v[16:19]",   "v[17:20]",   "v[18:21]",   "v[19:22]",
+    "v[20:23]",   "v[21:24]",   "v[22:25]",   "v[23:26]",   "v[24:27]",
+    "v[25:28]",   "v[26:29]",   "v[27:30]",   "v[28:31]",   "v[29:32]",
+    "v[30:33]",   "v[31:34]",   "v[32:35]",   "v[33:36]",   "v[34:37]",
+    "v[35:38]",   "v[36:39]",   "v[37:40]",   "v[38:41]",   "v[39:42]",
+    "v[40:43]",   "v[41:44]",   "v[42:45]",   "v[43:46]",   "v[44:47]",
+    "v[45:48]",   "v[46:49]",   "v[47:50]",   "v[48:51]",   "v[49:52]",
+    "v[50:53]",   "v[51:54]",   "v[52:55]",   "v[53:56]",   "v[54:57]",
+    "v[55:58]",   "v[56:59]",   "v[57:60]",   "v[58:61]",   "v[59:62]",
+    "v[60:63]",   "v[61:64]",   "v[62:65]",   "v[63:66]",   "v[64:67]",
+    "v[65:68]",   "v[66:69]",   "v[67:70]",   "v[68:71]",   "v[69:72]",
+    "v[70:73]",   "v[71:74]",   "v[72:75]",   "v[73:76]",   "v[74:77]",
+    "v[75:78]",   "v[76:79]",   "v[77:80]",   "v[78:81]",   "v[79:82]",
+    "v[80:83]",   "v[81:84]",   "v[82:85]",   "v[83:86]",   "v[84:87]",
+    "v[85:88]",   "v[86:89]",   "v[87:90]",   "v[88:91]",   "v[89:92]",
+    "v[90:93]",   "v[91:94]",   "v[92:95]",   "v[93:96]",   "v[94:97]",
+    "v[95:98]",   "v[96:99]",   "v[97:100]",  "v[98:101]",  "v[99:102]",
+    "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]",
+    "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]",
+    "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]",
+    "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]",
+    "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]",
+    "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]",
+    "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]",
+    "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]",
+    "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]",
+    "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]",
+    "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]",
+    "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]",
+    "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]",
+    "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]",
+    "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]",
+    "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]",
+    "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]",
+    "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]",
+    "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]",
+    "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]",
+    "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]",
+    "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]",
+    "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]",
+    "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]",
+    "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]",
+    "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]",
+    "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]",
+    "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]",
+    "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]",
+    "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]",
+    "v[250:253]", "v[251:254]", "v[252:255]"
+};
+
+static const char *const VGPR256RegNames[] = {
+    "v[0:7]",     "v[1:8]",     "v[2:9]",     "v[3:10]",    "v[4:11]",
+    "v[5:12]",    "v[6:13]",    "v[7:14]",    "v[8:15]",    "v[9:16]",
+    "v[10:17]",   "v[11:18]",   "v[12:19]",   "v[13:20]",   "v[14:21]",
+    "v[15:22]",   "v[16:23]",   "v[17:24]",   "v[18:25]",   "v[19:26]",
+    "v[20:27]",   "v[21:28]",   "v[22:29]",   "v[23:30]",   "v[24:31]",
+    "v[25:32]",   "v[26:33]",   "v[27:34]",   "v[28:35]",   "v[29:36]",
+    "v[30:37]",   "v[31:38]",   "v[32:39]",   "v[33:40]",   "v[34:41]",
+    "v[35:42]",   "v[36:43]",   "v[37:44]",   "v[38:45]",   "v[39:46]",
+    "v[40:47]",   "v[41:48]",   "v[42:49]",   "v[43:50]",   "v[44:51]",
+    "v[45:52]",   "v[46:53]",   "v[47:54]",   "v[48:55]",   "v[49:56]",
+    "v[50:57]",   "v[51:58]",   "v[52:59]",   "v[53:60]",   "v[54:61]",
+    "v[55:62]",   "v[56:63]",   "v[57:64]",   "v[58:65]",   "v[59:66]",
+    "v[60:67]",   "v[61:68]",   "v[62:69]",   "v[63:70]",   "v[64:71]",
+    "v[65:72]",   "v[66:73]",   "v[67:74]",   "v[68:75]",   "v[69:76]",
+    "v[70:77]",   "v[71:78]",   "v[72:79]",   "v[73:80]",   "v[74:81]",
+    "v[75:82]",   "v[76:83]",   "v[77:84]",   "v[78:85]",   "v[79:86]",
+    "v[80:87]",   "v[81:88]",   "v[82:89]",   "v[83:90]",   "v[84:91]",
+    "v[85:92]",   "v[86:93]",   "v[87:94]",   "v[88:95]",   "v[89:96]",
+    "v[90:97]",   "v[91:98]",   "v[92:99]",   "v[93:100]",  "v[94:101]",
+    "v[95:102]",  "v[96:103]",  "v[97:104]",  "v[98:105]",  "v[99:106]",
+    "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]",
+    "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]",
+    "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]",
+    "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]",
+    "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]",
+    "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]",
+    "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]",
+    "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]",
+    "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]",
+    "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]",
+    "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]",
+    "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]",
+    "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]",
+    "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]",
+    "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]",
+    "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]",
+    "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]",
+    "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]",
+    "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]",
+    "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]",
+    "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]",
+    "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]",
+    "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]",
+    "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]",
+    "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]",
+    "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]",
+    "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]",
+    "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]",
+    "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]",
+    "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]"
+};
+
+static const char *const VGPR512RegNames[] = {
+    "v[0:15]",    "v[1:16]",    "v[2:17]",    "v[3:18]",    "v[4:19]",
+    "v[5:20]",    "v[6:21]",    "v[7:22]",    "v[8:23]",    "v[9:24]",
+    "v[10:25]",   "v[11:26]",   "v[12:27]",   "v[13:28]",   "v[14:29]",
+    "v[15:30]",   "v[16:31]",   "v[17:32]",   "v[18:33]",   "v[19:34]",
+    "v[20:35]",   "v[21:36]",   "v[22:37]",   "v[23:38]",   "v[24:39]",
+    "v[25:40]",   "v[26:41]",   "v[27:42]",   "v[28:43]",   "v[29:44]",
+    "v[30:45]",   "v[31:46]",   "v[32:47]",   "v[33:48]",   "v[34:49]",
+    "v[35:50]",   "v[36:51]",   "v[37:52]",   "v[38:53]",   "v[39:54]",
+    "v[40:55]",   "v[41:56]",   "v[42:57]",   "v[43:58]",   "v[44:59]",
+    "v[45:60]",   "v[46:61]",   "v[47:62]",   "v[48:63]",   "v[49:64]",
+    "v[50:65]",   "v[51:66]",   "v[52:67]",   "v[53:68]",   "v[54:69]",
+    "v[55:70]",   "v[56:71]",   "v[57:72]",   "v[58:73]",   "v[59:74]",
+    "v[60:75]",   "v[61:76]",   "v[62:77]",   "v[63:78]",   "v[64:79]",
+    "v[65:80]",   "v[66:81]",   "v[67:82]",   "v[68:83]",   "v[69:84]",
+    "v[70:85]",   "v[71:86]",   "v[72:87]",   "v[73:88]",   "v[74:89]",
+    "v[75:90]",   "v[76:91]",   "v[77:92]",   "v[78:93]",   "v[79:94]",
+    "v[80:95]",   "v[81:96]",   "v[82:97]",   "v[83:98]",   "v[84:99]",
+    "v[85:100]",  "v[86:101]",  "v[87:102]",  "v[88:103]",  "v[89:104]",
+    "v[90:105]",  "v[91:106]",  "v[92:107]",  "v[93:108]",  "v[94:109]",
+    "v[95:110]",  "v[96:111]",  "v[97:112]",  "v[98:113]",  "v[99:114]",
+    "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]",
+    "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]",
+    "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]",
+    "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]",
+    "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]",
+    "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]",
+    "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]",
+    "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]",
+    "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]",
+    "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]",
+    "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]",
+    "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]",
+    "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]",
+    "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]",
+    "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]",
+    "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]",
+    "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]",
+    "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]",
+    "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]",
+    "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]",
+    "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]",
+    "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]",
+    "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]",
+    "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]",
+    "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]",
+    "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]",
+    "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]",
+    "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]",
+    "v[240:255]"
+};
+
+static const char *const SGPR64RegNames[] = {
+    "s[0:1]",   "s[2:3]",   "s[4:5]",     "s[6:7]",     "s[8:9]",   "s[10:11]",
+    "s[12:13]", "s[14:15]", "s[16:17]",   "s[18:19]",   "s[20:21]", "s[22:23]",
+    "s[24:25]", "s[26:27]", "s[28:29]",   "s[30:31]",   "s[32:33]", "s[34:35]",
+    "s[36:37]", "s[38:39]", "s[40:41]",   "s[42:43]",   "s[44:45]", "s[46:47]",
+    "s[48:49]", "s[50:51]", "s[52:53]",   "s[54:55]",   "s[56:57]", "s[58:59]",
+    "s[60:61]", "s[62:63]", "s[64:65]",   "s[66:67]",   "s[68:69]", "s[70:71]",
+    "s[72:73]", "s[74:75]", "s[76:77]",   "s[78:79]",   "s[80:81]", "s[82:83]",
+    "s[84:85]", "s[86:87]", "s[88:89]",   "s[90:91]",   "s[92:93]", "s[94:95]",
+    "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]"
+};
+
+static const char *const SGPR128RegNames[] = {
+    "s[0:3]",   "s[4:7]",     "s[8:11]",  "s[12:15]", "s[16:19]", "s[20:23]",
+    "s[24:27]", "s[28:31]",   "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]",
+    "s[48:51]", "s[52:55]",   "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]",
+    "s[72:75]", "s[76:79]",   "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]",
+    "s[96:99]", "s[100:103]"
+};
+
+static const char *const SGPR256RegNames[] = {
+    "s[0:7]",   "s[4:11]",  "s[8:15]",  "s[12:19]", "s[16:23]",
+    "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]",
+    "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]",
+    "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]",
+    "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]"
+};
+
+static const char *const SGPR512RegNames[] = {
+    "s[0:15]",  "s[4:19]",  "s[8:23]",  "s[12:27]", "s[16:31]",  "s[20:35]",
+    "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]",  "s[44:59]",
+    "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]",  "s[68:83]",
+    "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]"
+};
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 7c198a1b8a3f..201fdc1974c6 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -36,7 +36,6 @@ protected:
 
 #define GET_TARGET_REGBANK_CLASS
 #include "AMDGPUGenRegisterBank.inc"
-
 };
 class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
   const SIRegisterInfo *TRI;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index b2867fcc49f9..ff58aa5741a1 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -40,7 +40,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
 #define GET_REGINFO_TARGET_DESC
 #include "AMDGPUGenRegisterInfo.inc"
 
-
 // Forced to be here by one .inc
 const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
   const MachineFunction *MF) const {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ed9cbb994fad..5f4f20316a6b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,12 +16,12 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
 
 #include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600ISelLowering.h"
 #include "R600FrameLowering.h"
-#include "SIInstrInfo.h"
-#include "SIISelLowering.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
 #include "SIFrameLowering.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/Triple.h"
@@ -57,9 +57,12 @@ public:
 
   enum {
     ISAVersion0_0_0,
+    ISAVersion6_0_0,
+    ISAVersion6_0_1,
     ISAVersion7_0_0,
     ISAVersion7_0_1,
     ISAVersion7_0_2,
+    ISAVersion7_0_3,
     ISAVersion8_0_0,
     ISAVersion8_0_1,
     ISAVersion8_0_2,
@@ -67,7 +70,9 @@ public:
     ISAVersion8_0_4,
     ISAVersion8_1_0,
     ISAVersion9_0_0,
-    ISAVersion9_0_1
+    ISAVersion9_0_1,
+    ISAVersion9_0_2,
+    ISAVersion9_0_3
   };
 
   enum TrapHandlerAbi {
@@ -787,7 +792,7 @@ public:
 
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
+    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
   }
 
   /// \returns VGPR encoding granularity supported by the subtarget.
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 404598ff4738..b644eba536fa 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,26 +28,26 @@
 #include "GCNSchedStrategy.h"
 #include "R600MachineScheduler.h"
 #include "SIMachineScheduler.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/AlwaysInliner.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Vectorize.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Vectorize.h"
 #include <memory>
 
 using namespace llvm;
@@ -734,7 +734,6 @@ void GCNPassConfig::addMachineSSAOptimization() {
   addPass(&SIFoldOperandsID);
   addPass(&DeadMachineInstructionElimID);
   addPass(&SILoadStoreOptimizerID);
-  addPass(createSIShrinkInstructionsPass());
   if (EnableSDWAPeephole) {
     addPass(&SIPeepholeSDWAID);
     addPass(&MachineLICMID);
@@ -742,6 +741,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
     addPass(&SIFoldOperandsID);
     addPass(&DeadMachineInstructionElimID);
   }
+  addPass(createSIShrinkInstructionsPass());
 }
 
 bool GCNPassConfig::addILPOpts() {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index c96761c0b04e..6c1885e67fcb 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPUTargetMachine.h"
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
-#include "Utils/AMDGPUBaseInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index beafebc1284a..dee3d2856701 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cc68c971b249..16e3b7b4ebee 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -11,18 +11,19 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "SIDefines.h"
+#include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "Utils/AMDKernelCodeTUtils.h"
-#include "Utils/AMDGPUAsmUtils.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -40,12 +41,11 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -814,14 +814,8 @@ private:
   bool ParseDirectiveCodeObjectMetadata();
   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
   bool ParseDirectiveAMDKernelCodeT();
-  bool ParseSectionDirectiveHSAText();
   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
   bool ParseDirectiveAMDGPUHsaKernel();
-  bool ParseDirectiveAMDGPUHsaModuleGlobal();
-  bool ParseDirectiveAMDGPUHsaProgramGlobal();
-  bool ParseSectionDirectiveHSADataGlobalAgent();
-  bool ParseSectionDirectiveHSADataGlobalProgram();
-  bool ParseSectionDirectiveHSARodataReadonlyAgent();
   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
                              RegisterKind RegKind, unsigned Reg1,
                              unsigned RegNum);
@@ -2365,12 +2359,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
   return false;
 }
 
-bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSATextSection(getContext()));
-  return false;
-}
-
 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
   if (getLexer().isNot(AsmToken::Identifier))
     return TokError("expected symbol name");
@@ -2384,46 +2372,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
   return false;
 }
 
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
-  if (getLexer().isNot(AsmToken::Identifier))
-    return TokError("expected symbol name");
-
-  StringRef GlobalName = Parser.getTok().getIdentifier();
-
-  getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName);
-  Lex();
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() {
-  if (getLexer().isNot(AsmToken::Identifier))
-    return TokError("expected symbol name");
-
-  StringRef GlobalName = Parser.getTok().getIdentifier();
-
-  getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName);
-  Lex();
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSADataGlobalAgentSection(getContext()));
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSADataGlobalProgramSection(getContext()));
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSARodataReadonlyAgentSection(getContext()));
-  return false;
-}
-
 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getString();
 
@@ -2439,27 +2387,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".amd_kernel_code_t")
     return ParseDirectiveAMDKernelCodeT();
 
-  if (IDVal == ".hsatext")
-    return ParseSectionDirectiveHSAText();
-
   if (IDVal == ".amdgpu_hsa_kernel")
     return ParseDirectiveAMDGPUHsaKernel();
 
-  if (IDVal == ".amdgpu_hsa_module_global")
-    return ParseDirectiveAMDGPUHsaModuleGlobal();
-
-  if (IDVal == ".amdgpu_hsa_program_global")
-    return ParseDirectiveAMDGPUHsaProgramGlobal();
-
-  if (IDVal == ".hsadata_global_agent")
-    return ParseSectionDirectiveHSADataGlobalAgent();
-
-  if (IDVal == ".hsadata_global_program")
-    return ParseSectionDirectiveHSADataGlobalProgram();
-
-  if (IDVal == ".hsarodata_readonly_agent")
-    return ParseSectionDirectiveHSARodataReadonlyAgent();
-
   return true;
 }
 
@@ -2919,6 +2849,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
   if (getLexer().isNot(AsmToken::Integer))
     return true;
 
+  SMLoc ValLoc = Parser.getTok().getLoc();
   if (getParser().parseAbsoluteExpression(CntVal))
     return true;
 
@@ -2936,21 +2867,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
   }
 
-  // To improve diagnostics, do not skip delimiters on errors
-  if (!Failed) {
-    if (getLexer().isNot(AsmToken::RParen)) {
-      return true;
-    }
-    Parser.Lex();
-    if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
-      const AsmToken NextToken = getLexer().peekTok();
-      if (NextToken.is(AsmToken::Identifier)) {
-        Parser.Lex();
-      }
+  if (Failed) {
+    Error(ValLoc, "too large value for " + CntName);
+    return true;
+  }
+
+  if (getLexer().isNot(AsmToken::RParen)) {
+    return true;
+  }
+
+  Parser.Lex();
+  if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
+    const AsmToken NextToken = getLexer().peekTok();
+    if (NextToken.is(AsmToken::Identifier)) {
+      Parser.Lex();
     }
   }
 
-  return Failed;
+  return false;
 }
 
 OperandMatchResultTy
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index cafce0164fa9..e30844f082cd 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUISelLowering.cpp
   AMDGPUInstrInfo.cpp
   AMDGPUPromoteAlloca.cpp
+  AMDGPURegAsmNames.inc.cpp
   AMDGPURegisterInfo.cpp
   AMDGPUUnifyDivergentExitNodes.cpp
   GCNHazardRecognizer.cpp
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9b3cde7c4df6..88c92b9582fd 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -20,21 +20,20 @@
 #include "AMDGPUDisassembler.h"
 #include "AMDGPU.h"
 #include "AMDGPURegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/TargetRegistry.h"
 
-
 using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-disassembler"
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 0ff405a71e9b..5fa3cf1a223f 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -20,8 +20,8 @@
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
-#include <cstdint>
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 
 namespace llvm {
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 80fc4ac9d2a3..cd9e7fb04f16 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPUSubtarget.h"
 #include "GCNHazardRecognizer.h"
+#include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
 #include "SIRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 523eea41897e..b84640230eee 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -9,8 +9,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUInstPrinter.h"
-#include "SIDefines.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
 #include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index f3266fe82955..0a9c2b94c1ee 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -8,8 +8,8 @@
 /// \file
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
deleted file mode 100644
index 816e8c744b27..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
+++ /dev/null
@@ -1,422 +0,0 @@
-//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief AMDGPU Code Object Metadata definitions and in-memory
-/// representations.
-///
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
-
-#include <cstdint>
-#include <string>
-#include <system_error>
-#include <vector>
-
-namespace llvm {
-namespace AMDGPU {
-
-//===----------------------------------------------------------------------===//
-// Code Object Metadata.
-//===----------------------------------------------------------------------===//
-namespace CodeObject {
-
-/// \brief Code object metadata major version.
-constexpr uint32_t MetadataVersionMajor = 1;
-/// \brief Code object metadata minor version.
-constexpr uint32_t MetadataVersionMinor = 0;
-
-/// \brief Code object metadata beginning assembler directive.
-constexpr char MetadataAssemblerDirectiveBegin[] =
-    ".amdgpu_code_object_metadata";
-/// \brief Code object metadata ending assembler directive.
-constexpr char MetadataAssemblerDirectiveEnd[] =
-    ".end_amdgpu_code_object_metadata";
-
-/// \brief Access qualifiers.
-enum class AccessQualifier : uint8_t {
-  Default   = 0,
-  ReadOnly  = 1,
-  WriteOnly = 2,
-  ReadWrite = 3,
-  Unknown   = 0xff
-};
-
-/// \brief Address space qualifiers.
-enum class AddressSpaceQualifier : uint8_t {
-  Private  = 0,
-  Global   = 1,
-  Constant = 2,
-  Local    = 3,
-  Generic  = 4,
-  Region   = 5,
-  Unknown  = 0xff
-};
-
-/// \brief Value kinds.
-enum class ValueKind : uint8_t {
-  ByValue                = 0,
-  GlobalBuffer           = 1,
-  DynamicSharedPointer   = 2,
-  Sampler                = 3,
-  Image                  = 4,
-  Pipe                   = 5,
-  Queue                  = 6,
-  HiddenGlobalOffsetX    = 7,
-  HiddenGlobalOffsetY    = 8,
-  HiddenGlobalOffsetZ    = 9,
-  HiddenNone             = 10,
-  HiddenPrintfBuffer     = 11,
-  HiddenDefaultQueue     = 12,
-  HiddenCompletionAction = 13,
-  Unknown                = 0xff
-};
-
-/// \brief Value types.
-enum class ValueType : uint8_t {
-  Struct  = 0,
-  I8      = 1,
-  U8      = 2,
-  I16     = 3,
-  U16     = 4,
-  F16     = 5,
-  I32     = 6,
-  U32     = 7,
-  F32     = 8,
-  I64     = 9,
-  U64     = 10,
-  F64     = 11,
-  Unknown = 0xff
-};
-
-//===----------------------------------------------------------------------===//
-// Kernel Metadata.
-//===----------------------------------------------------------------------===//
-namespace Kernel {
-
-//===----------------------------------------------------------------------===//
-// Kernel Attributes Metadata.
-//===----------------------------------------------------------------------===//
-namespace Attrs {
-
-namespace Key {
-/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
-constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
-/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
-constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
-/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
-constexpr char VecTypeHint[] = "VecTypeHint";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel attributes metadata.
-struct Metadata final {
-  /// \brief 'reqd_work_group_size' attribute. Optional.
-  std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
-  /// \brief 'work_group_size_hint' attribute. Optional.
-  std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
-  /// \brief 'vec_type_hint' attribute. Optional.
-  std::string mVecTypeHint = std::string();
-
-  /// \brief Default constructor.
-  Metadata() = default;
-
-  /// \returns True if kernel attributes metadata is empty, false otherwise.
-  bool empty() const {
-    return mReqdWorkGroupSize.empty() &&
-           mWorkGroupSizeHint.empty() &&
-           mVecTypeHint.empty();
-  }
-
-  /// \returns True if kernel attributes metadata is not empty, false otherwise.
-  bool notEmpty() const {
-    return !empty();
-  }
-};
-
-} // end namespace Attrs
-
-//===----------------------------------------------------------------------===//
-// Kernel Argument Metadata.
-//===----------------------------------------------------------------------===//
-namespace Arg {
-
-namespace Key {
-/// \brief Key for Kernel::Arg::Metadata::mSize.
-constexpr char Size[] = "Size";
-/// \brief Key for Kernel::Arg::Metadata::mAlign.
-constexpr char Align[] = "Align";
-/// \brief Key for Kernel::Arg::Metadata::mValueKind.
-constexpr char ValueKind[] = "ValueKind";
-/// \brief Key for Kernel::Arg::Metadata::mValueType.
-constexpr char ValueType[] = "ValueType";
-/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
-constexpr char PointeeAlign[] = "PointeeAlign";
-/// \brief Key for Kernel::Arg::Metadata::mAccQual.
-constexpr char AccQual[] = "AccQual";
-/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
-constexpr char AddrSpaceQual[] = "AddrSpaceQual";
-/// \brief Key for Kernel::Arg::Metadata::mIsConst.
-constexpr char IsConst[] = "IsConst";
-/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
-constexpr char IsPipe[] = "IsPipe";
-/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
-constexpr char IsRestrict[] = "IsRestrict";
-/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
-constexpr char IsVolatile[] = "IsVolatile";
-/// \brief Key for Kernel::Arg::Metadata::mName.
-constexpr char Name[] = "Name";
-/// \brief Key for Kernel::Arg::Metadata::mTypeName.
-constexpr char TypeName[] = "TypeName";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel argument metadata.
-struct Metadata final {
-  /// \brief Size in bytes. Required.
-  uint32_t mSize = 0;
-  /// \brief Alignment in bytes. Required.
-  uint32_t mAlign = 0;
-  /// \brief Value kind. Required.
-  ValueKind mValueKind = ValueKind::Unknown;
-  /// \brief Value type. Required.
-  ValueType mValueType = ValueType::Unknown;
-  /// \brief Pointee alignment in bytes. Optional.
-  uint32_t mPointeeAlign = 0;
-  /// \brief Access qualifier. Optional.
-  AccessQualifier mAccQual = AccessQualifier::Unknown;
-  /// \brief Address space qualifier. Optional.
-  AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
-  /// \brief True if 'const' qualifier is specified. Optional.
-  bool mIsConst = false;
-  /// \brief True if 'pipe' qualifier is specified. Optional.
-  bool mIsPipe = false;
-  /// \brief True if 'restrict' qualifier is specified. Optional.
-  bool mIsRestrict = false;
-  /// \brief True if 'volatile' qualifier is specified. Optional.
-  bool mIsVolatile = false;
-  /// \brief Name. Optional.
-  std::string mName = std::string();
-  /// \brief Type name. Optional.
-  std::string mTypeName = std::string();
-
-  /// \brief Default constructor.
-  Metadata() = default;
-};
-
-} // end namespace Arg
-
-//===----------------------------------------------------------------------===//
-// Kernel Code Properties Metadata.
-//===----------------------------------------------------------------------===//
-namespace CodeProps {
-
-namespace Key {
-/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize.
-constexpr char KernargSegmentSize[] = "KernargSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize.
-constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize.
-constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs.
-constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs.
-constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
-/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign.
-constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign.
-constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign.
-constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize.
-constexpr char WavefrontSize[] = "WavefrontSize";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel code properties metadata.
-struct Metadata final {
-  /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
-  /// holds the values of the arguments to the kernel. Optional.
-  uint64_t mKernargSegmentSize = 0;
-  /// \brief Size in bytes of the group segment memory required by a workgroup.
-  /// This value does not include any dynamically allocated group segment memory
-  /// that may be added when the kernel is dispatched. Optional.
-  uint32_t mWorkgroupGroupSegmentSize = 0;
-  /// \brief Size in bytes of the private segment memory required by a workitem.
-  /// Private segment memory includes arg, spill and private segments. Optional.
-  uint32_t mWorkitemPrivateSegmentSize = 0;
-  /// \brief Total number of SGPRs used by a wavefront. Optional.
-  uint16_t mWavefrontNumSGPRs = 0;
-  /// \brief Total number of VGPRs used by a workitem. Optional.
-  uint16_t mWorkitemNumVGPRs = 0;
-  /// \brief Maximum byte alignment of variables used by the kernel in the
-  /// kernarg memory segment. Expressed as a power of two. Optional.
-  uint8_t mKernargSegmentAlign = 0;
-  /// \brief Maximum byte alignment of variables used by the kernel in the
-  /// group memory segment. Expressed as a power of two. Optional.
-  uint8_t mGroupSegmentAlign = 0;
-  /// \brief Maximum byte alignment of variables used by the kernel in the
-  /// private memory segment. Expressed as a power of two. Optional.
-  uint8_t mPrivateSegmentAlign = 0;
-  /// \brief Wavefront size. Expressed as a power of two. Optional.
-  uint8_t mWavefrontSize = 0;
-
-  /// \brief Default constructor.
-  Metadata() = default;
-
-  /// \returns True if kernel code properties metadata is empty, false
-  /// otherwise.
-  bool empty() const {
-    return !notEmpty();
-  }
-
-  /// \returns True if kernel code properties metadata is not empty, false
-  /// otherwise.
-  bool notEmpty() const {
-    return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
-           mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
-           mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
-           mPrivateSegmentAlign || mWavefrontSize;
-  }
-};
-
-} // end namespace CodeProps
-
-//===----------------------------------------------------------------------===//
-// Kernel Debug Properties Metadata.
-//===----------------------------------------------------------------------===//
-namespace DebugProps {
-
-namespace Key {
-/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion.
-constexpr char DebuggerABIVersion[] = "DebuggerABIVersion";
-/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs.
-constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs";
-/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR.
-constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR";
-/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR.
-constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR";
-/// \brief Key for
-///     Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR.
-constexpr char WavefrontPrivateSegmentOffsetSGPR[] =
-    "WavefrontPrivateSegmentOffsetSGPR";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel debug properties metadata.
-struct Metadata final {
-  /// \brief Debugger ABI version. Optional.
-  std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>();
-  /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if
-  /// mDebuggerABIVersion is not set. Optional.
-  uint16_t mReservedNumVGPRs = 0;
-  /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if
-  /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional.
-  uint16_t mReservedFirstVGPR = uint16_t(-1);
-  /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used
-  /// for the entire kernel execution. Must be uint16_t(-1) if
-  /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional.
-  uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1);
-  /// \brief Fixed SGPR used to hold the wave scratch offset for the entire
-  /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set
-  /// or SGPR is not used or not known. Optional.
-  uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1);
-
-  /// \brief Default constructor.
-  Metadata() = default;
-
-  /// \returns True if kernel debug properties metadata is empty, false
-  /// otherwise.
-  bool empty() const {
-    return !notEmpty();
-  }
-
-  /// \returns True if kernel debug properties metadata is not empty, false
-  /// otherwise.
-  bool notEmpty() const {
-    return !mDebuggerABIVersion.empty();
-  }
-};
-
-} // end namespace DebugProps
-
-namespace Key {
-/// \brief Key for Kernel::Metadata::mName.
-constexpr char Name[] = "Name";
-/// \brief Key for Kernel::Metadata::mLanguage.
-constexpr char Language[] = "Language";
-/// \brief Key for Kernel::Metadata::mLanguageVersion.
-constexpr char LanguageVersion[] = "LanguageVersion";
-/// \brief Key for Kernel::Metadata::mAttrs.
-constexpr char Attrs[] = "Attrs";
-/// \brief Key for Kernel::Metadata::mArgs.
-constexpr char Args[] = "Args";
-/// \brief Key for Kernel::Metadata::mCodeProps.
-constexpr char CodeProps[] = "CodeProps";
-/// \brief Key for Kernel::Metadata::mDebugProps.
-constexpr char DebugProps[] = "DebugProps";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel metadata.
-struct Metadata final {
-  /// \brief Name. Required.
-  std::string mName = std::string();
-  /// \brief Language. Optional.
-  std::string mLanguage = std::string();
-  /// \brief Language version. Optional.
-  std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
-  /// \brief Attributes metadata. Optional.
-  Attrs::Metadata mAttrs = Attrs::Metadata();
-  /// \brief Arguments metadata. Optional.
-  std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
-  /// \brief Code properties metadata. Optional.
-  CodeProps::Metadata mCodeProps = CodeProps::Metadata();
-  /// \brief Debug properties metadata. Optional.
-  DebugProps::Metadata mDebugProps = DebugProps::Metadata();
-
-  /// \brief Default constructor.
-  Metadata() = default;
-};
-
-} // end namespace Kernel
-
-namespace Key {
-/// \brief Key for CodeObject::Metadata::mVersion.
-constexpr char Version[] = "Version";
-/// \brief Key for CodeObject::Metadata::mPrintf.
-constexpr char Printf[] = "Printf";
-/// \brief Key for CodeObject::Metadata::mKernels.
-constexpr char Kernels[] = "Kernels";
-} // end namespace Key
-
-/// \brief In-memory representation of code object metadata.
-struct Metadata final {
-  /// \brief Code object metadata version. Required.
-  std::vector<uint32_t> mVersion = std::vector<uint32_t>();
-  /// \brief Printf metadata. Optional.
-  std::vector<std::string> mPrintf = std::vector<std::string>();
-  /// \brief Kernels metadata. Optional.
-  std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
-
-  /// \brief Default constructor.
-  Metadata() = default;
-
-  /// \brief Converts \p YamlString to \p CodeObjectMetadata.
-  static std::error_code fromYamlString(std::string YamlString,
-                                        Metadata &CodeObjectMetadata);
-
-  /// \brief Converts \p CodeObjectMetadata to \p YamlString.
-  static std::error_code toYamlString(Metadata CodeObjectMetadata,
-                                      std::string &YamlString);
-};
-
-} // end namespace CodeObject
-} // end namespace AMDGPU
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
index 647017d5061d..4e828a791e09 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -13,20 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "AMDGPU.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::AMDGPU;
-using namespace llvm::AMDGPU::CodeObject;
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
@@ -37,192 +29,7 @@ static cl::opt<bool> VerifyCodeObjectMetadata(
     "amdgpu-verify-comd",
     cl::desc("Verify AMDGPU Code Object Metadata"));
 
-namespace yaml {
-
-template <>
-struct ScalarEnumerationTraits<AccessQualifier> {
-  static void enumeration(IO &YIO, AccessQualifier &EN) {
-    YIO.enumCase(EN, "Default", AccessQualifier::Default);
-    YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
-    YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
-    YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
-  }
-};
-
-template <>
-struct ScalarEnumerationTraits<AddressSpaceQualifier> {
-  static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
-    YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
-    YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
-    YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
-    YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
-    YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
-    YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
-  }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueKind> {
-  static void enumeration(IO &YIO, ValueKind &EN) {
-    YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
-    YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
-    YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
-    YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
-    YIO.enumCase(EN, "Image", ValueKind::Image);
-    YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
-    YIO.enumCase(EN, "Queue", ValueKind::Queue);
-    YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
-    YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
-    YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
-    YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
-    YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
-    YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
-    YIO.enumCase(EN, "HiddenCompletionAction",
-                 ValueKind::HiddenCompletionAction);
-  }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueType> {
-  static void enumeration(IO &YIO, ValueType &EN) {
-    YIO.enumCase(EN, "Struct", ValueType::Struct);
-    YIO.enumCase(EN, "I8", ValueType::I8);
-    YIO.enumCase(EN, "U8", ValueType::U8);
-    YIO.enumCase(EN, "I16", ValueType::I16);
-    YIO.enumCase(EN, "U16", ValueType::U16);
-    YIO.enumCase(EN, "F16", ValueType::F16);
-    YIO.enumCase(EN, "I32", ValueType::I32);
-    YIO.enumCase(EN, "U32", ValueType::U32);
-    YIO.enumCase(EN, "F32", ValueType::F32);
-    YIO.enumCase(EN, "I64", ValueType::I64);
-    YIO.enumCase(EN, "U64", ValueType::U64);
-    YIO.enumCase(EN, "F64", ValueType::F64);
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::Attrs::Metadata> {
-  static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
-    YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
-                    MD.mReqdWorkGroupSize, std::vector<uint32_t>());
-    YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
-                    MD.mWorkGroupSizeHint, std::vector<uint32_t>());
-    YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
-                    MD.mVecTypeHint, std::string());
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::Arg::Metadata> {
-  static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
-    YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
-    YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
-    YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
-    YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
-    YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
-                    uint32_t(0));
-    YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
-                    AccessQualifier::Unknown);
-    YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
-                    AddressSpaceQualifier::Unknown);
-    YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
-    YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
-    YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
-    YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
-    YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
-    YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::CodeProps::Metadata> {
-  static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
-    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
-                    MD.mKernargSegmentSize, uint64_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
-                    MD.mWorkgroupGroupSegmentSize, uint32_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
-                    MD.mWorkitemPrivateSegmentSize, uint32_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
-                    MD.mWavefrontNumSGPRs, uint16_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
-                    MD.mWorkitemNumVGPRs, uint16_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
-                    MD.mKernargSegmentAlign, uint8_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
-                    MD.mGroupSegmentAlign, uint8_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
-                    MD.mPrivateSegmentAlign, uint8_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
-                    MD.mWavefrontSize, uint8_t(0));
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::DebugProps::Metadata> {
-  static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
-    YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
-                    MD.mDebuggerABIVersion, std::vector<uint32_t>());
-    YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
-                    MD.mReservedNumVGPRs, uint16_t(0));
-    YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
-                    MD.mReservedFirstVGPR, uint16_t(-1));
-    YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
-                    MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
-    YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
-                    MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::Metadata> {
-  static void mapping(IO &YIO, Kernel::Metadata &MD) {
-    YIO.mapRequired(Kernel::Key::Name, MD.mName);
-    YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
-    YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
-                    std::vector<uint32_t>());
-    if (!MD.mAttrs.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
-    if (!MD.mArgs.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
-    if (!MD.mCodeProps.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
-    if (!MD.mDebugProps.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
-  }
-};
-
-template <>
-struct MappingTraits<CodeObject::Metadata> {
-  static void mapping(IO &YIO, CodeObject::Metadata &MD) {
-    YIO.mapRequired(Key::Version, MD.mVersion);
-    YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
-    if (!MD.mKernels.empty() || !YIO.outputting())
-      YIO.mapOptional(Key::Kernels, MD.mKernels);
-  }
-};
-
-} // end namespace yaml
-
 namespace AMDGPU {
-
-/* static */
-std::error_code CodeObject::Metadata::fromYamlString(
-    std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
-  yaml::Input YamlInput(YamlString);
-  YamlInput >> CodeObjectMetadata;
-  return YamlInput.error();
-}
-
-/* static */
-std::error_code CodeObject::Metadata::toYamlString(
-    CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
-  raw_string_ostream YamlStream(YamlString);
-  yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
-  YamlOutput << CodeObjectMetadata;
-  return std::error_code();
-}
-
 namespace CodeObject {
 
 void MetadataStreamer::dump(StringRef YamlString) const {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
index 8d4c51763f63..c6681431d74d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -17,9 +17,9 @@
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
 
 #include "AMDGPU.h"
-#include "AMDGPUCodeObjectMetadata.h"
 #include "AMDKernelCodeT.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AMDGPUCodeObjectMetadata.h"
 #include "llvm/Support/ErrorOr.h"
 
 namespace llvm {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 073d19422e86..6abe7f3d37d5 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 8dc863f723e2..2a0032fc9adc 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,12 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUTargetStreamer.h"
+#include "AMDGPU.h"
 #include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
@@ -25,7 +26,6 @@
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
 
 namespace llvm {
@@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
   }
 }
 
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
-    StringRef GlobalName) {
-  OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
-}
-
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
-    StringRef GlobalName) {
-  OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
-}
-
 bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
   auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
   if (!VerifiedYamlString)
@@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
   Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
 }
 
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
-    StringRef GlobalName) {
-
-  MCSymbolELF *Symbol = cast<MCSymbolELF>(
-      getStreamer().getContext().getOrCreateSymbol(GlobalName));
-  Symbol->setType(ELF::STT_OBJECT);
-  Symbol->setBinding(ELF::STB_LOCAL);
-}
-
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
-    StringRef GlobalName) {
-
-  MCSymbolELF *Symbol = cast<MCSymbolELF>(
-      getStreamer().getContext().getOrCreateSymbol(GlobalName));
-  Symbol->setType(ELF::STT_OBJECT);
-  Symbol->setBinding(ELF::STB_GLOBAL);
-}
-
 bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
   auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
   if (!VerifiedYamlString)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 5c588bbded9c..968128e94d0b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -44,10 +44,6 @@ public:
 
   virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
 
-  virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
-
-  virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
-
   virtual void EmitStartOfCodeObjectMetadata(const Module &Mod);
 
   virtual void EmitKernelCodeObjectMetadata(
@@ -74,10 +70,6 @@ public:
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
-  void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
-  void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
   /// \returns True on success, false on failure.
   bool EmitCodeObjectMetadata(StringRef YamlString) override;
 };
@@ -105,10 +97,6 @@ public:
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
-  void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
-  void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
   /// \returns True on success, false on failure.
   bool EmitCodeObjectMetadata(StringRef YamlString) override;
 };
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 6015ec190fd4..eab90e1d344c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -14,10 +14,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "R600Defines.h"
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "R600Defines.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index 0e4eda982139..f6f2582aa11b 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -80,50 +80,53 @@ def : Proc<"cayman",     R600_VLIW4_Itin,
 // Southern Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"SI", SIFullSpeedModel,
-  [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"gfx600",     SIFullSpeedModel, 
+  [FeatureISAVersion6_0_0]>;
+
+def : ProcessorModel<"SI",         SIFullSpeedModel,
+  [FeatureISAVersion6_0_0]
+>;
+
+def : ProcessorModel<"tahiti",     SIFullSpeedModel,
+  [FeatureISAVersion6_0_0]
 >;
 
-def : ProcessorModel<"tahiti", SIFullSpeedModel,
-  [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"gfx601",     SIQuarterSpeedModel,
+  [FeatureISAVersion6_0_1]
 >;
 
-def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"pitcairn",   SIQuarterSpeedModel, 
+  [FeatureISAVersion6_0_1]>;
 
-def : ProcessorModel<"verde",    SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"verde",      SIQuarterSpeedModel,
+  [FeatureISAVersion6_0_1]>;
 
-def : ProcessorModel<"oland",    SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"oland",      SIQuarterSpeedModel,
+  [FeatureISAVersion6_0_1]>;
 
-def : ProcessorModel<"hainan",   SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"hainan",     SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>;
 
 //===----------------------------------------------------------------------===//
 // Sea Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
+def : ProcessorModel<"gfx700",     SIQuarterSpeedModel,
   [FeatureISAVersion7_0_0]
 >;
 
-def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_2]
+def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_0]
 >;
 
 def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
   [FeatureISAVersion7_0_0]
 >;
 
-def : ProcessorModel<"hawaii",     SIFullSpeedModel,
+def : ProcessorModel<"gfx701",     SIFullSpeedModel,
   [FeatureISAVersion7_0_1]
 >;
 
-def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_2]>;
-
-def : ProcessorModel<"gfx700",     SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_0]
->;
-
-def : ProcessorModel<"gfx701",     SIFullSpeedModel,
+def : ProcessorModel<"hawaii",     SIFullSpeedModel,
   [FeatureISAVersion7_0_1]
 >;
 
@@ -131,6 +134,17 @@ def : ProcessorModel<"gfx702",     SIQuarterSpeedModel,
   [FeatureISAVersion7_0_2]
 >;
 
+def : ProcessorModel<"gfx703",     SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_3]>;
+
 //===----------------------------------------------------------------------===//
 // Volcanic Islands
 //===----------------------------------------------------------------------===//
@@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
   [FeatureISAVersion8_1_0]
 >;
 
-def : ProcessorModel<"gfx900",   SIQuarterSpeedModel,
-  [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
+//===----------------------------------------------------------------------===//
+// GFX9
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_0]
+>;
+
+def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_1]
+>;
+
+def : ProcessorModel<"gfx902", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_2]
 >;
 
-def : ProcessorModel<"gfx901",   SIQuarterSpeedModel,
-  [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
+def : ProcessorModel<"gfx903", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_3]
 >;
+
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 09b328765604..6993e8a62a9c 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -12,15 +12,14 @@
 /// computing their address on the fly ; it also sets STACK_SIZE info.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Debug.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600RegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +29,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 03fc1aff5ec1..0d8ccd088ec4 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -15,10 +15,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 5c30a0734f0d..66def2d29caf 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -15,11 +15,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 1f01ad732e00..37787b3c5f72 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -10,8 +10,8 @@
 #include "R600FrameLowering.h"
 #include "AMDGPUSubtarget.h"
 #include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/MathExtras.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 60b913cfd39a..c55878f8bff0 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
     Mask = DAG.getConstant(0xff, DL, MVT::i32);
   } else if (Store->getMemoryVT() == MVT::i16) {
     assert(Store->getAlignment() >= 2);
-    Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
+    Mask = DAG.getConstant(0xffff, DL, MVT::i32);
   } else {
     llvm_unreachable("Unsupported private trunc store");
   }
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 2422d57269eb..c5da5e404200 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "R600InstrInfo.h"
 #include "AMDGPU.h"
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600FrameLowering.h"
-#include "R600InstrInfo.h"
 #include "R600RegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/BitVector.h"
@@ -35,8 +35,8 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
-#include <cstring>
 #include <cstdint>
+#include <cstring>
 #include <iterator>
 #include <utility>
 #include <vector>
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index db18e5bd1afa..47fda1c8fa82 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "R600MachineScheduler.h"
-#include "R600InstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Pass.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index 3e957126b497..1cb40938cee7 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Debug.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "R600InstrInfo.h"
@@ -24,6 +23,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
index 62ebef8e91af..b5c439b21b89 100644
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
@@ -19,8 +19,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SIInstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 3cca815d8773..5f5f25103c02 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -65,10 +65,10 @@
 /// ultimately led to the creation of an illegal COPY.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/DenseSet.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index dfac068d1f69..e10f1ed3762e 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -730,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
     // Make sure sources are identical.
     const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
     const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
-    if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() ||
+    if (!Src0->isReg() || !Src1->isReg() ||
+        Src0->getSubReg() != Src1->getSubReg() ||
         Src0->getSubReg() != AMDGPU::NoSubRegister)
       return nullptr;
 
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 97bb0f0c0656..b1bd14e421f0 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -8,10 +8,10 @@
 //==-----------------------------------------------------------------------===//
 
 #include "SIFrameLowering.h"
+#include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
 
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index b48b23911105..599ee942d738 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,12 +17,12 @@
 #define _USE_MATH_DEFINES
 #endif
 
+#include "SIISelLowering.h"
 #include "AMDGPU.h"
 #include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUTargetMachine.h"
 #include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
 #include "SIDefines.h"
-#include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
@@ -2604,7 +2604,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
-  return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
+  return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
 }
 
 SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c10badba88f3..0f009a48754a 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -229,7 +229,7 @@ public:
                      MachineInstr &MI);
 
   BlockWaitcntBrackets()
-      : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false),
+      : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false),
         LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) {
     for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
          T = (enum InstCounterType)(T + 1)) {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 36d29b8ecf06..58c05cf16f15 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -20,10 +20,10 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 35d3a93d8710..5f1c7f1fc42f 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -60,8 +60,8 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 3680e02da576..ba616ada0c9c 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -21,8 +21,8 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetMachine.h"
 
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 348bb4fa0260..9fdb8caac6f2 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -15,8 +15,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
 
 #include "AMDGPUMachineFunction.h"
-#include "SIRegisterInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 9d4e677400e6..bb17dbbdfbd6 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SIMachineScheduler.h"
 #include "AMDGPU.h"
 #include "SIInstrInfo.h"
-#include "SIMachineScheduler.h"
 #include "SIRegisterInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index fae249b04492..f4ddf1891683 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -20,13 +20,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
-
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include <unordered_map>
@@ -129,7 +128,8 @@ public:
   bool getNeg() const { return Neg; }
   bool getSext() const { return Sext; }
 
-  uint64_t getSrcMods() const;
+  uint64_t getSrcMods(const SIInstrInfo *TII,
+                      const MachineOperand *SrcOp) const;
 };
 
 class SDWADstOperand : public SDWAOperand {
@@ -240,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg,
   return SuperMask.all();
 }
 
-uint64_t SDWASrcOperand::getSrcMods() const {
+uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
+                                    const MachineOperand *SrcOp) const {
   uint64_t Mods = 0;
+  const auto *MI = SrcOp->getParent();
+  if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
+    if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+      Mods = Mod->getImm();
+    }
+  } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
+    if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
+      Mods = Mod->getImm();
+    }
+  }
   if (Abs || Neg) {
     assert(!Sext &&
            "Float and integer src modifiers can't be set simulteniously");
     Mods |= Abs ? SISrcMods::ABS : 0;
-    Mods |= Neg ? SISrcMods::NEG : 0;
+    Mods ^= Neg ? SISrcMods::NEG : 0;
   } else if (Sext) {
     Mods |= SISrcMods::SEXT;
   }
@@ -312,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
   }
   copyRegOperand(*Src, *getTargetOperand());
   SrcSel->setImm(getSrcSel());
-  SrcMods->setImm(getSrcMods());
+  SrcMods->setImm(getSrcMods(TII, Src));
   getTargetOperand()->setIsKill(false);
   return true;
 }
@@ -409,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
       switch (Opcode) {
       case AMDGPU::V_LSHRREV_B32_e32:
       case AMDGPU::V_ASHRREV_I32_e32:
-      case AMDGPU::V_LSHLREV_B32_e32: {
+      case AMDGPU::V_LSHLREV_B32_e32:
+      case AMDGPU::V_LSHRREV_B32_e64:
+      case AMDGPU::V_ASHRREV_I32_e64:
+      case AMDGPU::V_LSHLREV_B32_e64: {
         // from: v_lshrrev_b32_e32 v1, 16/24, v0
         // to SDWA src:v0 src_sel:WORD_1/BYTE_3
 
@@ -432,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
             TRI->isPhysicalRegister(Dst->getReg()))
           break;
 
-        if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+        if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
+            Opcode == AMDGPU::V_LSHLREV_B32_e64) {
           auto SDWADst = make_unique<SDWADstOperand>(
               Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -441,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
         } else {
           auto SDWASrc = make_unique<SDWASrcOperand>(
               Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
-              Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+              Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
+              Opcode != AMDGPU::V_LSHRREV_B32_e64);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
           SDWAOperands[&MI] = std::move(SDWASrc);
           ++NumSDWAPatternsFound;
@@ -451,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
 
       case AMDGPU::V_LSHRREV_B16_e32:
       case AMDGPU::V_ASHRREV_I16_e32:
-      case AMDGPU::V_LSHLREV_B16_e32: {
+      case AMDGPU::V_LSHLREV_B16_e32:
+      case AMDGPU::V_LSHRREV_B16_e64:
+      case AMDGPU::V_ASHRREV_I16_e64:
+      case AMDGPU::V_LSHLREV_B16_e64: {
         // from: v_lshrrev_b16_e32 v1, 8, v0
         // to SDWA src:v0 src_sel:BYTE_1
 
@@ -472,7 +491,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
             TRI->isPhysicalRegister(Dst->getReg()))
           break;
 
-        if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+        if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
+            Opcode == AMDGPU::V_LSHLREV_B16_e64) {
           auto SDWADst =
             make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -481,7 +501,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
         } else {
           auto SDWASrc = make_unique<SDWASrcOperand>(
               Src1, Dst, BYTE_1, false, false,
-              Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+              Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
+              Opcode != AMDGPU::V_LSHRREV_B16_e64);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
           SDWAOperands[&MI] = std::move(SDWASrc);
           ++NumSDWAPatternsFound;
@@ -549,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
         ++NumSDWAPatternsFound;
         break;
       }
-      case AMDGPU::V_AND_B32_e32: {
+      case AMDGPU::V_AND_B32_e32:
+      case AMDGPU::V_AND_B32_e64: {
         // e.g.:
         // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
         // to SDWA src:v0 src_sel:WORD_0/BYTE_0
 
         MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+        MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+        auto ValSrc = Src1;
         auto Imm = foldToImm(*Src0);
-        if (!Imm)
-          break;
 
-        if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
+        if (!Imm) {
+          Imm = foldToImm(*Src1);
+          ValSrc = Src0;
+        }
+
+        if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
           break;
 
-        MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
         MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
       
         if (TRI->isPhysicalRegister(Src1->getReg()) ||
@@ -570,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
           break;
 
         auto SDWASrc = make_unique<SDWASrcOperand>(
-            Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
+            ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
         DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
         SDWAOperands[&MI] = std::move(SDWASrc);
         ++NumSDWAPatternsFound;
@@ -583,28 +609,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
 
 bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
   // Check if this instruction has opcode that supports SDWA
-  return AMDGPU::getSDWAOp(MI.getOpcode()) != -1;
+  unsigned Opc = MI.getOpcode();
+  if (AMDGPU::getSDWAOp(Opc) != -1)
+    return true;
+  int Opc32 = AMDGPU::getVOPe32(Opc);
+  if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
+    return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+           !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+  return false;
 }
 
 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
                                    const SDWAOperandsVector &SDWAOperands) {
   // Convert to sdwa
   int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
+  if (SDWAOpcode == -1)
+    SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
   assert(SDWAOpcode != -1);
 
+  // Copy dst, if it is present in original then should also be present in SDWA
+  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+  if (!Dst && !TII->isVOPC(MI))
+    return false;
+
   const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
 
   // Create SDWA version of instruction MI and initialize its operands
   MachineInstrBuilder SDWAInst =
     BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
 
-  // Copy dst, if it is present in original then should also be present in SDWA
-  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
   if (Dst) {
     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
     SDWAInst.add(*Dst);
-  } else {
-    assert(TII->isVOPC(MI));
   }
 
   // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     Src0 &&
     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
-  SDWAInst.addImm(0);
+  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
+    SDWAInst.addImm(Mod->getImm());
+  else
+    SDWAInst.addImm(0);
   SDWAInst.add(*Src0);
 
   // Copy src1 if present, initialize src1_modifiers.
@@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     assert(
       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
-    SDWAInst.addImm(0);
+    if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
+      SDWAInst.addImm(Mod->getImm());
+    else
+      SDWAInst.addImm(0);
     SDWAInst.add(*Src1);
-  } else {
-    assert(TII->isVOP1(MI));
   }
 
   if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
@@ -746,8 +786,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
   PotentialMatches.clear();
   SDWAOperands.clear();
 
+  bool Ret = !ConvertedInstructions.empty();
   while (!ConvertedInstructions.empty())
     legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
 
-  return false;
+  return Ret;
 }
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 6fb01a09fe13..b611f28fcabd 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SIRegisterInfo.h"
+#include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
-#include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
@@ -1104,6 +1104,66 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   }
 }
 
+StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
+  #define AMDGPU_REG_ASM_NAMES
+  #include "AMDGPURegAsmNames.inc.cpp"
+
+  #define REG_RANGE(BeginReg, EndReg, RegTable)            \
+    if (Reg >= BeginReg && Reg <= EndReg) {                \
+      unsigned Index = Reg - BeginReg;                     \
+      assert(Index < array_lengthof(RegTable));            \
+      return RegTable[Index];                              \
+    }
+
+  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
+  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
+  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
+  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
+  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
+            VGPR96RegNames);
+
+  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
+            AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
+            VGPR128RegNames);
+  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
+            AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
+            SGPR128RegNames);
+
+  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
+            AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+            VGPR256RegNames);
+
+  REG_RANGE(
+    AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
+    AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+    VGPR512RegNames);
+
+  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
+            AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+            SGPR256RegNames);
+
+  REG_RANGE(
+    AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
+    AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+    SGPR512RegNames
+  );
+
+#undef REG_RANGE
+
+  // FIXME: Rename flat_scr so we don't need to special case this.
+  switch (Reg) {
+  case AMDGPU::FLAT_SCR:
+    return "flat_scratch";
+  case AMDGPU::FLAT_SCR_LO:
+    return "flat_scratch_lo";
+  case AMDGPU::FLAT_SCR_HI:
+    return "flat_scratch_hi";
+  default:
+    // For the special named registers the default is fine.
+    return TargetRegisterInfo::getRegAsmName(Reg);
+  }
+}
+
 // FIXME: This is very slow. It might be worth creating a map from physreg to
 // register class.
 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index a648c178101a..8fed6d5f9710 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -16,8 +16,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
 
 #include "AMDGPURegisterInfo.h"
-#include "SIDefines.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
 namespace llvm {
@@ -118,6 +118,8 @@ public:
   bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
                                           int FI, RegScavenger *RS) const;
 
+  StringRef getRegAsmName(unsigned Reg) const override;
+
   unsigned getHWRegIndex(unsigned Reg) const {
     return getEncodingValue(Reg) & 0xff;
   }
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 630f469eabf0..f581e69980c7 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,11 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUBaseInfo.h"
+#include "AMDGPU.h"
 #include "SIDefines.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constants.h"
@@ -27,7 +28,6 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
@@ -38,7 +38,6 @@
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 
-
 #define GET_INSTRINFO_NAMED_OPS
 #include "AMDGPUGenInstrInfo.inc"
 #undef GET_INSTRINFO_NAMED_OPS
@@ -104,6 +103,11 @@ namespace AMDGPU {
 namespace IsaInfo {
 
 IsaVersion getIsaVersion(const FeatureBitset &Features) {
+  // SI.
+  if (Features.test(FeatureISAVersion6_0_0))
+    return {6, 0, 0};
+  if (Features.test(FeatureISAVersion6_0_1))
+    return {6, 0, 1};
   // CI.
   if (Features.test(FeatureISAVersion7_0_0))
     return {7, 0, 0};
@@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
     return {7, 0, 1};
   if (Features.test(FeatureISAVersion7_0_2))
     return {7, 0, 2};
+  if (Features.test(FeatureISAVersion7_0_3))
+    return {7, 0, 3};
 
   // VI.
   if (Features.test(FeatureISAVersion8_0_0))
@@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
     return {9, 0, 0};
   if (Features.test(FeatureISAVersion9_0_1))
     return {9, 0, 1};
+  if (Features.test(FeatureISAVersion9_0_2))
+    return {9, 0, 2};
+  if (Features.test(FeatureISAVersion9_0_3))
+    return {9, 0, 3};
 
   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
     return {0, 0, 0};
@@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
   Header.private_segment_alignment = 4;
 }
 
-MCSection *getHSATextSection(MCContext &Ctx) {
-  return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
-                           ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                           ELF::SHF_EXECINSTR |
-                           ELF::SHF_AMDGPU_HSA_AGENT |
-                           ELF::SHF_AMDGPU_HSA_CODE);
-}
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
-  return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
-                           ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                           ELF::SHF_AMDGPU_HSA_GLOBAL |
-                           ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
-  return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
-                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                            ELF::SHF_AMDGPU_HSA_GLOBAL);
-}
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
-  return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
-                           ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
-                           ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
   return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
 }
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 19888ad7556a..eff0230d21f5 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
 
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const FeatureBitset &Features);
-MCSection *getHSATextSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
 
 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 77fc9551cff9..a8ca593f14ed 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
 def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
 def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
 def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
-def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbit>;
+def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
 def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
 def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
 def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
@@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64,
 }
 
 def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
+
+let Constraints = "@earlyclobber $vdst" in {
 def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>;
+} // End Constraints = "@earlyclobber $vdst"
 
 def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
   let SchedRW = [WriteDouble];
@@ -232,8 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
 
 let SubtargetPredicate = isCIVI in {
 
+let Constraints = "@earlyclobber $vdst" in {
 def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>;
 def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
+} // End Constraints = "@earlyclobber $vdst"
 
 let isCommutable = 1 in {
 def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
author	Dimitry Andric <dim@FreeBSD.org>	2017-06-10 13:44:06 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-06-10 13:44:06 +0000
commit	7ab83427af0f77b59941ceba41d509d7d097b065 (patch)
tree	cc41c05b1db454e3d802f34df75e636ee922ad87 /lib/Target/AMDGPU
parent	d288ef4c1788d3a951a7558c68312c2d320612b1 (diff)