summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-10 13:44:06 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-10 13:44:06 +0000
commit7ab83427af0f77b59941ceba41d509d7d097b065 (patch)
treecc41c05b1db454e3d802f34df75e636ee922ad87 /lib/Target/AMDGPU
parentd288ef4c1788d3a951a7558c68312c2d320612b1 (diff)
Notes
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td33
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp12
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp10
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp20
-rw-r--r--lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp353
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h17
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp18
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp106
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp7
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h2
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.cpp2
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h422
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp197
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp32
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h12
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp2
-rw-r--r--lib/Target/AMDGPU/Processors.td75
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp4
-rw-r--r--lib/Target/AMDGPU/R600EmitClauseMarkers.cpp2
-rw-r--r--lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp2
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp4
-rw-r--r--lib/Target/AMDGPU/R600MachineScheduler.cpp4
-rw-r--r--lib/Target/AMDGPU/R600Packetizer.cpp2
-rw-r--r--lib/Target/AMDGPU/SIDebuggerInsertNops.cpp2
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp2
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp3
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp6
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp2
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/SILowerControlFlow.cpp2
-rw-r--r--lib/Target/AMDGPU/SILowerI1Copies.cpp2
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h2
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp2
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp97
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp62
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h4
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp43
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h7
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td9
63 files changed, 715 insertions, 928 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index b50e8d1d659e..6ab2b9ef0459 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -447,6 +447,16 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
Implies
>;
+def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
+ [FeatureSouthernIslands,
+ FeatureFastFMAF32,
+ HalfRate64Ops,
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
+ [FeatureSouthernIslands,
+ FeatureLDSBankCount32]>;
+
def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
[FeatureSeaIslands,
FeatureLDSBankCount32]>;
@@ -461,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
[FeatureSeaIslands,
FeatureLDSBankCount16]>;
+def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
+ [FeatureSeaIslands,
+ FeatureLDSBankCount16]>;
+
def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
@@ -489,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
FeatureLDSBankCount16,
FeatureXNACK]>;
-def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
-def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
+def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
+ [FeatureGFX9,
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,
+ [FeatureGFX9,
+ FeatureLDSBankCount32,
+ FeatureXNACK]>;
+
+def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
+ [FeatureGFX9,
+ FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3,
+ [FeatureGFX9,
+ FeatureLDSBankCount32,
+ FeatureXNACK]>;
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 3c99f48e818a..faa424eb0a64 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,15 +10,15 @@
/// This is the AMGPU address space based alias analysis pass.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 91b3649f5c39..3c788fa1dcea 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -19,8 +19,8 @@
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0959014812d8..83ad1a5c6ee3 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,25 +17,25 @@
//
#include "AMDGPUAsmPrinter.h"
-#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUTargetStreamer.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "Utils/AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "SIDefines.h"
-#include "SIMachineFunctionInfo.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index e5adeeb465e1..0a58ce06704d 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,8 +15,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
-#include "AMDKernelCodeT.h"
#include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include <cstddef>
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index e67ae092fdda..515cc07dd449 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -18,8 +18,8 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPUSubtarget.h"
#include "SIISelLowering.h"
-#include "SIRegisterInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index d923cb117c12..b312dbc8d14d 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -25,13 +25,13 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 19fce064783d..251c2f9bb25a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -13,15 +13,15 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 723e8a7b54e2..5586b513b5fc 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -21,6 +21,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600MachineFunctionInfo.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,7 +31,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/KnownBits.h"
-#include "SIInstrInfo.h"
using namespace llvm;
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 12caa5118342..41cc7d7093ec 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -17,8 +17,8 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#include "AMDGPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c87102e55dfb..ef845f44d365 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -15,9 +15,9 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#include "AMDGPU.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 267f4807a788..b889788c3426 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -14,10 +14,10 @@
#include "AMDGPULegalizerInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
@@ -47,12 +47,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
setAction({G_GEP, P2}, Legal);
setAction({G_GEP, 1, S64}, Legal);
+ setAction({G_ICMP, S1}, Legal);
+ setAction({G_ICMP, 1, S32}, Legal);
+
setAction({G_LOAD, P1}, Legal);
setAction({G_LOAD, P2}, Legal);
setAction({G_LOAD, S32}, Legal);
setAction({G_LOAD, 1, P1}, Legal);
setAction({G_LOAD, 1, P2}, Legal);
+ setAction({G_SELECT, S32}, Legal);
+ setAction({G_SELECT, 1, S1}, Legal);
+
setAction({G_STORE, S32}, Legal);
setAction({G_STORE, 1, P1}, Legal);
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index f1ef6281c90f..63dd0d726d91 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -38,7 +38,6 @@ using namespace llvm;
#include "AMDGPUGenMCPseudoLowering.inc"
-
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st,
const AsmPrinter &ap):
Ctx(ctx), ST(st), AP(ap) { }
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 6d2785ba1c60..2071b6f157cd 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "SIInstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 8bfeb67ad4ec..99bb61b21db0 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -10,8 +10,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 07f92918a43f..625c9b77e2de 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -33,11 +33,11 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -319,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
switch (Inst->getOpcode()) {
case Instruction::Load: {
LoadInst *LI = cast<LoadInst>(Inst);
- return !LI->isVolatile();
+ // Currently only handle the case where the Pointer Operand is a GEP so check for that case.
+ return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile();
}
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
return true;
case Instruction::Store: {
- // Must be the stored pointer operand, not a stored value.
+ // Must be the stored pointer operand, not a stored value, plus
+ // since it should be canonical form, the User should be a GEP.
StoreInst *SI = cast<StoreInst>(Inst);
- return (SI->getPointerOperand() == User) && !SI->isVolatile();
+ return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile();
}
default:
return false;
@@ -341,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
// FIXME: There is no reason why we can't support larger arrays, we
// are just being conservative for now.
+ // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
+ // could also be promoted but we don't currently handle this case
if (!AllocaTy ||
AllocaTy->getElementType()->isVectorTy() ||
+ AllocaTy->getElementType()->isArrayTy() ||
AllocaTy->getNumElements() > 4 ||
AllocaTy->getNumElements() < 2) {
DEBUG(dbgs() << " Cannot convert type to vector\n");
@@ -390,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
switch (Inst->getOpcode()) {
case Instruction::Load: {
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
- Value *Ptr = Inst->getOperand(0);
+ Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
@@ -403,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
case Instruction::Store: {
Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
- Value *Ptr = Inst->getOperand(1);
+ StoreInst *SI = cast<StoreInst>(Inst);
+ Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(BitCast);
Value *NewVecValue = Builder.CreateInsertElement(VecValue,
- Inst->getOperand(0),
+ SI->getValueOperand(),
Index);
Builder.CreateStore(NewVecValue, BitCast);
Inst->eraseFromParent();
diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
new file mode 100644
index 000000000000..36d88f52910d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
@@ -0,0 +1,353 @@
+//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===//
+
+#ifdef AMDGPU_REG_ASM_NAMES
+
+static const char *const VGPR32RegNames[] = {
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
+ "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
+ "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
+ "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
+ "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
+ "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
+ "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
+ "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
+ "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
+ "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
+ "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
+ "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
+ "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
+ "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
+ "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
+ "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
+ "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
+ "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
+ "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
+ "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
+ "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
+ "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
+ "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
+ "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
+ "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
+ "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
+ "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
+ "v252", "v253", "v254", "v255"
+};
+
+static const char *const SGPR32RegNames[] = {
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9",
+ "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19",
+ "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29",
+ "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39",
+ "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
+ "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59",
+ "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69",
+ "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79",
+ "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89",
+ "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99",
+ "s100", "s101", "s102", "s103"
+};
+
+static const char *const VGPR64RegNames[] = {
+ "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]",
+ "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]",
+ "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]",
+ "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]",
+ "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]",
+ "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]",
+ "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]",
+ "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]",
+ "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]",
+ "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]",
+ "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]",
+ "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]",
+ "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]",
+ "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]",
+ "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]",
+ "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]",
+ "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]",
+ "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]",
+ "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]",
+ "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]",
+ "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]",
+ "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]",
+ "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]",
+ "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]",
+ "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]",
+ "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]",
+ "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]",
+ "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]",
+ "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]",
+ "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]",
+ "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]",
+ "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]",
+ "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]",
+ "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]",
+ "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]",
+ "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]",
+ "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]",
+ "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]",
+ "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]",
+ "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]",
+ "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]",
+ "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]",
+ "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]",
+ "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]",
+ "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]",
+ "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]",
+ "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]",
+ "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]",
+ "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]",
+ "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]",
+ "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]"
+};
+
+static const char *const VGPR96RegNames[] = {
+ "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]",
+ "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]",
+ "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]",
+ "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]",
+ "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]",
+ "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]",
+ "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]",
+ "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]",
+ "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]",
+ "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]",
+ "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]",
+ "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]",
+ "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]",
+ "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]",
+ "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]",
+ "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]",
+ "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]",
+ "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]",
+ "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]",
+ "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]",
+ "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]",
+ "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]",
+ "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]",
+ "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]",
+ "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]",
+ "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]",
+ "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]",
+ "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]",
+ "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]",
+ "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]",
+ "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]",
+ "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]",
+ "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]",
+ "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]",
+ "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]",
+ "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]",
+ "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]",
+ "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]",
+ "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]",
+ "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]",
+ "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]",
+ "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]",
+ "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]",
+ "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]",
+ "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]",
+ "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]",
+ "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]",
+ "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]",
+ "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]",
+ "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]",
+ "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]"
+};
+
+static const char *const VGPR128RegNames[] = {
+ "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]",
+ "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]",
+ "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]",
+ "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]",
+ "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]",
+ "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]",
+ "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]",
+ "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]",
+ "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]",
+ "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]",
+ "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]",
+ "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]",
+ "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]",
+ "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]",
+ "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]",
+ "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]",
+ "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]",
+ "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]",
+ "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]",
+ "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]",
+ "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]",
+ "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]",
+ "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]",
+ "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]",
+ "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]",
+ "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]",
+ "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]",
+ "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]",
+ "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]",
+ "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]",
+ "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]",
+ "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]",
+ "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]",
+ "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]",
+ "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]",
+ "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]",
+ "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]",
+ "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]",
+ "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]",
+ "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]",
+ "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]",
+ "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]",
+ "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]",
+ "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]",
+ "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]",
+ "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]",
+ "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]",
+ "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]",
+ "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]",
+ "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]",
+ "v[250:253]", "v[251:254]", "v[252:255]"
+};
+
+static const char *const VGPR256RegNames[] = {
+ "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]",
+ "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]",
+ "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]",
+ "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]",
+ "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]",
+ "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]",
+ "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]",
+ "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]",
+ "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]",
+ "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]",
+ "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]",
+ "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]",
+ "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]",
+ "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]",
+ "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]",
+ "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]",
+ "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]",
+ "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]",
+ "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]",
+ "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]",
+ "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]",
+ "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]",
+ "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]",
+ "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]",
+ "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]",
+ "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]",
+ "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]",
+ "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]",
+ "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]",
+ "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]",
+ "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]",
+ "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]",
+ "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]",
+ "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]",
+ "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]",
+ "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]",
+ "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]",
+ "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]",
+ "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]",
+ "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]",
+ "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]",
+ "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]",
+ "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]",
+ "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]",
+ "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]",
+ "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]",
+ "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]",
+ "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]",
+ "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]",
+ "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]"
+};
+
+static const char *const VGPR512RegNames[] = {
+ "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]",
+ "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]",
+ "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]",
+ "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]",
+ "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]",
+ "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]",
+ "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]",
+ "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]",
+ "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]",
+ "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]",
+ "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]",
+ "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]",
+ "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]",
+ "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]",
+ "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]",
+ "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]",
+ "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]",
+ "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]",
+ "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]",
+ "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]",
+ "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]",
+ "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]",
+ "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]",
+ "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]",
+ "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]",
+ "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]",
+ "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]",
+ "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]",
+ "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]",
+ "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]",
+ "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]",
+ "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]",
+ "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]",
+ "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]",
+ "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]",
+ "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]",
+ "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]",
+ "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]",
+ "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]",
+ "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]",
+ "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]",
+ "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]",
+ "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]",
+ "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]",
+ "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]",
+ "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]",
+ "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]",
+ "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]",
+ "v[240:255]"
+};
+
+static const char *const SGPR64RegNames[] = {
+ "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]",
+ "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]",
+ "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]",
+ "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]",
+ "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]",
+ "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]",
+ "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]",
+ "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]",
+ "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]"
+};
+
+static const char *const SGPR128RegNames[] = {
+ "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]",
+ "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]",
+ "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]",
+ "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]",
+ "s[96:99]", "s[100:103]"
+};
+
+static const char *const SGPR256RegNames[] = {
+ "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]",
+ "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]",
+ "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]",
+ "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]",
+ "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]"
+};
+
+static const char *const SGPR512RegNames[] = {
+ "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]",
+ "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]",
+ "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]",
+ "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]"
+};
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 7c198a1b8a3f..201fdc1974c6 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -36,7 +36,6 @@ protected:
#define GET_TARGET_REGBANK_CLASS
#include "AMDGPUGenRegisterBank.inc"
-
};
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const SIRegisterInfo *TRI;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index b2867fcc49f9..ff58aa5741a1 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -40,7 +40,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"
-
// Forced to be here by one .inc
const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
const MachineFunction *MF) const {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ed9cbb994fad..5f4f20316a6b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,12 +16,12 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600ISelLowering.h"
#include "R600FrameLowering.h"
-#include "SIInstrInfo.h"
-#include "SIISelLowering.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
#include "SIFrameLowering.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
@@ -57,9 +57,12 @@ public:
enum {
ISAVersion0_0_0,
+ ISAVersion6_0_0,
+ ISAVersion6_0_1,
ISAVersion7_0_0,
ISAVersion7_0_1,
ISAVersion7_0_2,
+ ISAVersion7_0_3,
ISAVersion8_0_0,
ISAVersion8_0_1,
ISAVersion8_0_2,
@@ -67,7 +70,9 @@ public:
ISAVersion8_0_4,
ISAVersion8_1_0,
ISAVersion9_0_0,
- ISAVersion9_0_1
+ ISAVersion9_0_1,
+ ISAVersion9_0_2,
+ ISAVersion9_0_3
};
enum TrapHandlerAbi {
@@ -787,7 +792,7 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
}
/// \returns VGPR encoding granularity supported by the subtarget.
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 404598ff4738..b644eba536fa 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,26 +28,26 @@
#include "GCNSchedStrategy.h"
#include "R600MachineScheduler.h"
#include "SIMachineScheduler.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/AlwaysInliner.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Vectorize.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Vectorize.h"
#include <memory>
using namespace llvm;
@@ -734,7 +734,6 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
- addPass(createSIShrinkInstructionsPass());
if (EnableSDWAPeephole) {
addPass(&SIPeepholeSDWAID);
addPass(&MachineLICMID);
@@ -742,6 +741,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
}
+ addPass(createSIShrinkInstructionsPass());
}
bool GCNPassConfig::addILPOpts() {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index c96761c0b04e..6c1885e67fcb 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,13 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUTargetMachine.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
-#include "Utils/AMDGPUBaseInfo.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index beafebc1284a..dee3d2856701 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -20,8 +20,8 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cc68c971b249..16e3b7b4ebee 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -11,18 +11,19 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
+#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
-#include "Utils/AMDGPUAsmUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -40,12 +41,11 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -814,14 +814,8 @@ private:
bool ParseDirectiveCodeObjectMetadata();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
- bool ParseSectionDirectiveHSAText();
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
bool ParseDirectiveAMDGPUHsaKernel();
- bool ParseDirectiveAMDGPUHsaModuleGlobal();
- bool ParseDirectiveAMDGPUHsaProgramGlobal();
- bool ParseSectionDirectiveHSADataGlobalAgent();
- bool ParseSectionDirectiveHSADataGlobalProgram();
- bool ParseSectionDirectiveHSARodataReadonlyAgent();
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
RegisterKind RegKind, unsigned Reg1,
unsigned RegNum);
@@ -2365,12 +2359,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
return false;
}
-bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSATextSection(getContext()));
- return false;
-}
-
bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
if (getLexer().isNot(AsmToken::Identifier))
return TokError("expected symbol name");
@@ -2384,46 +2372,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
return false;
}
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
- if (getLexer().isNot(AsmToken::Identifier))
- return TokError("expected symbol name");
-
- StringRef GlobalName = Parser.getTok().getIdentifier();
-
- getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName);
- Lex();
- return false;
-}
-
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() {
- if (getLexer().isNot(AsmToken::Identifier))
- return TokError("expected symbol name");
-
- StringRef GlobalName = Parser.getTok().getIdentifier();
-
- getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName);
- Lex();
- return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSADataGlobalAgentSection(getContext()));
- return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSADataGlobalProgramSection(getContext()));
- return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() {
- getParser().getStreamer().SwitchSection(
- AMDGPU::getHSARodataReadonlyAgentSection(getContext()));
- return false;
-}
-
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -2439,27 +2387,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
- if (IDVal == ".hsatext")
- return ParseSectionDirectiveHSAText();
-
if (IDVal == ".amdgpu_hsa_kernel")
return ParseDirectiveAMDGPUHsaKernel();
- if (IDVal == ".amdgpu_hsa_module_global")
- return ParseDirectiveAMDGPUHsaModuleGlobal();
-
- if (IDVal == ".amdgpu_hsa_program_global")
- return ParseDirectiveAMDGPUHsaProgramGlobal();
-
- if (IDVal == ".hsadata_global_agent")
- return ParseSectionDirectiveHSADataGlobalAgent();
-
- if (IDVal == ".hsadata_global_program")
- return ParseSectionDirectiveHSADataGlobalProgram();
-
- if (IDVal == ".hsarodata_readonly_agent")
- return ParseSectionDirectiveHSARodataReadonlyAgent();
-
return true;
}
@@ -2919,6 +2849,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getLexer().isNot(AsmToken::Integer))
return true;
+ SMLoc ValLoc = Parser.getTok().getLoc();
if (getParser().parseAbsoluteExpression(CntVal))
return true;
@@ -2936,21 +2867,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
}
- // To improve diagnostics, do not skip delimiters on errors
- if (!Failed) {
- if (getLexer().isNot(AsmToken::RParen)) {
- return true;
- }
- Parser.Lex();
- if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
- const AsmToken NextToken = getLexer().peekTok();
- if (NextToken.is(AsmToken::Identifier)) {
- Parser.Lex();
- }
+ if (Failed) {
+ Error(ValLoc, "too large value for " + CntName);
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::RParen)) {
+ return true;
+ }
+
+ Parser.Lex();
+ if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
+ const AsmToken NextToken = getLexer().peekTok();
+ if (NextToken.is(AsmToken::Identifier)) {
+ Parser.Lex();
}
}
- return Failed;
+ return false;
}
OperandMatchResultTy
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index cafce0164fa9..e30844f082cd 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUISelLowering.cpp
AMDGPUInstrInfo.cpp
AMDGPUPromoteAlloca.cpp
+ AMDGPURegAsmNames.inc.cpp
AMDGPURegisterInfo.cpp
AMDGPUUnifyDivergentExitNodes.cpp
GCNHazardRecognizer.cpp
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9b3cde7c4df6..88c92b9582fd 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -20,21 +20,20 @@
#include "AMDGPUDisassembler.h"
#include "AMDGPU.h"
#include "AMDGPURegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/TargetRegistry.h"
-
using namespace llvm;
#define DEBUG_TYPE "amdgpu-disassembler"
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 0ff405a71e9b..5fa3cf1a223f 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -20,8 +20,8 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
-#include <cstdint>
#include <algorithm>
+#include <cstdint>
#include <memory>
namespace llvm {
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 80fc4ac9d2a3..cd9e7fb04f16 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUSubtarget.h"
#include "GCNHazardRecognizer.h"
+#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 523eea41897e..b84640230eee 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -9,8 +9,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUInstPrinter.h"
-#include "SIDefines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index f3266fe82955..0a9c2b94c1ee 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
deleted file mode 100644
index 816e8c744b27..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
+++ /dev/null
@@ -1,422 +0,0 @@
-//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief AMDGPU Code Object Metadata definitions and in-memory
-/// representations.
-///
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
-
-#include <cstdint>
-#include <string>
-#include <system_error>
-#include <vector>
-
-namespace llvm {
-namespace AMDGPU {
-
-//===----------------------------------------------------------------------===//
-// Code Object Metadata.
-//===----------------------------------------------------------------------===//
-namespace CodeObject {
-
-/// \brief Code object metadata major version.
-constexpr uint32_t MetadataVersionMajor = 1;
-/// \brief Code object metadata minor version.
-constexpr uint32_t MetadataVersionMinor = 0;
-
-/// \brief Code object metadata beginning assembler directive.
-constexpr char MetadataAssemblerDirectiveBegin[] =
- ".amdgpu_code_object_metadata";
-/// \brief Code object metadata ending assembler directive.
-constexpr char MetadataAssemblerDirectiveEnd[] =
- ".end_amdgpu_code_object_metadata";
-
-/// \brief Access qualifiers.
-enum class AccessQualifier : uint8_t {
- Default = 0,
- ReadOnly = 1,
- WriteOnly = 2,
- ReadWrite = 3,
- Unknown = 0xff
-};
-
-/// \brief Address space qualifiers.
-enum class AddressSpaceQualifier : uint8_t {
- Private = 0,
- Global = 1,
- Constant = 2,
- Local = 3,
- Generic = 4,
- Region = 5,
- Unknown = 0xff
-};
-
-/// \brief Value kinds.
-enum class ValueKind : uint8_t {
- ByValue = 0,
- GlobalBuffer = 1,
- DynamicSharedPointer = 2,
- Sampler = 3,
- Image = 4,
- Pipe = 5,
- Queue = 6,
- HiddenGlobalOffsetX = 7,
- HiddenGlobalOffsetY = 8,
- HiddenGlobalOffsetZ = 9,
- HiddenNone = 10,
- HiddenPrintfBuffer = 11,
- HiddenDefaultQueue = 12,
- HiddenCompletionAction = 13,
- Unknown = 0xff
-};
-
-/// \brief Value types.
-enum class ValueType : uint8_t {
- Struct = 0,
- I8 = 1,
- U8 = 2,
- I16 = 3,
- U16 = 4,
- F16 = 5,
- I32 = 6,
- U32 = 7,
- F32 = 8,
- I64 = 9,
- U64 = 10,
- F64 = 11,
- Unknown = 0xff
-};
-
-//===----------------------------------------------------------------------===//
-// Kernel Metadata.
-//===----------------------------------------------------------------------===//
-namespace Kernel {
-
-//===----------------------------------------------------------------------===//
-// Kernel Attributes Metadata.
-//===----------------------------------------------------------------------===//
-namespace Attrs {
-
-namespace Key {
-/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
-constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
-/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
-constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
-/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
-constexpr char VecTypeHint[] = "VecTypeHint";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel attributes metadata.
-struct Metadata final {
- /// \brief 'reqd_work_group_size' attribute. Optional.
- std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
- /// \brief 'work_group_size_hint' attribute. Optional.
- std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
- /// \brief 'vec_type_hint' attribute. Optional.
- std::string mVecTypeHint = std::string();
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \returns True if kernel attributes metadata is empty, false otherwise.
- bool empty() const {
- return mReqdWorkGroupSize.empty() &&
- mWorkGroupSizeHint.empty() &&
- mVecTypeHint.empty();
- }
-
- /// \returns True if kernel attributes metadata is not empty, false otherwise.
- bool notEmpty() const {
- return !empty();
- }
-};
-
-} // end namespace Attrs
-
-//===----------------------------------------------------------------------===//
-// Kernel Argument Metadata.
-//===----------------------------------------------------------------------===//
-namespace Arg {
-
-namespace Key {
-/// \brief Key for Kernel::Arg::Metadata::mSize.
-constexpr char Size[] = "Size";
-/// \brief Key for Kernel::Arg::Metadata::mAlign.
-constexpr char Align[] = "Align";
-/// \brief Key for Kernel::Arg::Metadata::mValueKind.
-constexpr char ValueKind[] = "ValueKind";
-/// \brief Key for Kernel::Arg::Metadata::mValueType.
-constexpr char ValueType[] = "ValueType";
-/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
-constexpr char PointeeAlign[] = "PointeeAlign";
-/// \brief Key for Kernel::Arg::Metadata::mAccQual.
-constexpr char AccQual[] = "AccQual";
-/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
-constexpr char AddrSpaceQual[] = "AddrSpaceQual";
-/// \brief Key for Kernel::Arg::Metadata::mIsConst.
-constexpr char IsConst[] = "IsConst";
-/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
-constexpr char IsPipe[] = "IsPipe";
-/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
-constexpr char IsRestrict[] = "IsRestrict";
-/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
-constexpr char IsVolatile[] = "IsVolatile";
-/// \brief Key for Kernel::Arg::Metadata::mName.
-constexpr char Name[] = "Name";
-/// \brief Key for Kernel::Arg::Metadata::mTypeName.
-constexpr char TypeName[] = "TypeName";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel argument metadata.
-struct Metadata final {
- /// \brief Size in bytes. Required.
- uint32_t mSize = 0;
- /// \brief Alignment in bytes. Required.
- uint32_t mAlign = 0;
- /// \brief Value kind. Required.
- ValueKind mValueKind = ValueKind::Unknown;
- /// \brief Value type. Required.
- ValueType mValueType = ValueType::Unknown;
- /// \brief Pointee alignment in bytes. Optional.
- uint32_t mPointeeAlign = 0;
- /// \brief Access qualifier. Optional.
- AccessQualifier mAccQual = AccessQualifier::Unknown;
- /// \brief Address space qualifier. Optional.
- AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
- /// \brief True if 'const' qualifier is specified. Optional.
- bool mIsConst = false;
- /// \brief True if 'pipe' qualifier is specified. Optional.
- bool mIsPipe = false;
- /// \brief True if 'restrict' qualifier is specified. Optional.
- bool mIsRestrict = false;
- /// \brief True if 'volatile' qualifier is specified. Optional.
- bool mIsVolatile = false;
- /// \brief Name. Optional.
- std::string mName = std::string();
- /// \brief Type name. Optional.
- std::string mTypeName = std::string();
-
- /// \brief Default constructor.
- Metadata() = default;
-};
-
-} // end namespace Arg
-
-//===----------------------------------------------------------------------===//
-// Kernel Code Properties Metadata.
-//===----------------------------------------------------------------------===//
-namespace CodeProps {
-
-namespace Key {
-/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize.
-constexpr char KernargSegmentSize[] = "KernargSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize.
-constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize.
-constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
-/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs.
-constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
-/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs.
-constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
-/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign.
-constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign.
-constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign.
-constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
-/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize.
-constexpr char WavefrontSize[] = "WavefrontSize";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel code properties metadata.
-struct Metadata final {
- /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
- /// holds the values of the arguments to the kernel. Optional.
- uint64_t mKernargSegmentSize = 0;
- /// \brief Size in bytes of the group segment memory required by a workgroup.
- /// This value does not include any dynamically allocated group segment memory
- /// that may be added when the kernel is dispatched. Optional.
- uint32_t mWorkgroupGroupSegmentSize = 0;
- /// \brief Size in bytes of the private segment memory required by a workitem.
- /// Private segment memory includes arg, spill and private segments. Optional.
- uint32_t mWorkitemPrivateSegmentSize = 0;
- /// \brief Total number of SGPRs used by a wavefront. Optional.
- uint16_t mWavefrontNumSGPRs = 0;
- /// \brief Total number of VGPRs used by a workitem. Optional.
- uint16_t mWorkitemNumVGPRs = 0;
- /// \brief Maximum byte alignment of variables used by the kernel in the
- /// kernarg memory segment. Expressed as a power of two. Optional.
- uint8_t mKernargSegmentAlign = 0;
- /// \brief Maximum byte alignment of variables used by the kernel in the
- /// group memory segment. Expressed as a power of two. Optional.
- uint8_t mGroupSegmentAlign = 0;
- /// \brief Maximum byte alignment of variables used by the kernel in the
- /// private memory segment. Expressed as a power of two. Optional.
- uint8_t mPrivateSegmentAlign = 0;
- /// \brief Wavefront size. Expressed as a power of two. Optional.
- uint8_t mWavefrontSize = 0;
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \returns True if kernel code properties metadata is empty, false
- /// otherwise.
- bool empty() const {
- return !notEmpty();
- }
-
- /// \returns True if kernel code properties metadata is not empty, false
- /// otherwise.
- bool notEmpty() const {
- return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
- mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
- mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
- mPrivateSegmentAlign || mWavefrontSize;
- }
-};
-
-} // end namespace CodeProps
-
-//===----------------------------------------------------------------------===//
-// Kernel Debug Properties Metadata.
-//===----------------------------------------------------------------------===//
-namespace DebugProps {
-
-namespace Key {
-/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion.
-constexpr char DebuggerABIVersion[] = "DebuggerABIVersion";
-/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs.
-constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs";
-/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR.
-constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR";
-/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR.
-constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR";
-/// \brief Key for
-/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR.
-constexpr char WavefrontPrivateSegmentOffsetSGPR[] =
- "WavefrontPrivateSegmentOffsetSGPR";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel debug properties metadata.
-struct Metadata final {
- /// \brief Debugger ABI version. Optional.
- std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>();
- /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if
- /// mDebuggerABIVersion is not set. Optional.
- uint16_t mReservedNumVGPRs = 0;
- /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if
- /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional.
- uint16_t mReservedFirstVGPR = uint16_t(-1);
- /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used
- /// for the entire kernel execution. Must be uint16_t(-1) if
- /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional.
- uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1);
- /// \brief Fixed SGPR used to hold the wave scratch offset for the entire
- /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set
- /// or SGPR is not used or not known. Optional.
- uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1);
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \returns True if kernel debug properties metadata is empty, false
- /// otherwise.
- bool empty() const {
- return !notEmpty();
- }
-
- /// \returns True if kernel debug properties metadata is not empty, false
- /// otherwise.
- bool notEmpty() const {
- return !mDebuggerABIVersion.empty();
- }
-};
-
-} // end namespace DebugProps
-
-namespace Key {
-/// \brief Key for Kernel::Metadata::mName.
-constexpr char Name[] = "Name";
-/// \brief Key for Kernel::Metadata::mLanguage.
-constexpr char Language[] = "Language";
-/// \brief Key for Kernel::Metadata::mLanguageVersion.
-constexpr char LanguageVersion[] = "LanguageVersion";
-/// \brief Key for Kernel::Metadata::mAttrs.
-constexpr char Attrs[] = "Attrs";
-/// \brief Key for Kernel::Metadata::mArgs.
-constexpr char Args[] = "Args";
-/// \brief Key for Kernel::Metadata::mCodeProps.
-constexpr char CodeProps[] = "CodeProps";
-/// \brief Key for Kernel::Metadata::mDebugProps.
-constexpr char DebugProps[] = "DebugProps";
-} // end namespace Key
-
-/// \brief In-memory representation of kernel metadata.
-struct Metadata final {
- /// \brief Name. Required.
- std::string mName = std::string();
- /// \brief Language. Optional.
- std::string mLanguage = std::string();
- /// \brief Language version. Optional.
- std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
- /// \brief Attributes metadata. Optional.
- Attrs::Metadata mAttrs = Attrs::Metadata();
- /// \brief Arguments metadata. Optional.
- std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
- /// \brief Code properties metadata. Optional.
- CodeProps::Metadata mCodeProps = CodeProps::Metadata();
- /// \brief Debug properties metadata. Optional.
- DebugProps::Metadata mDebugProps = DebugProps::Metadata();
-
- /// \brief Default constructor.
- Metadata() = default;
-};
-
-} // end namespace Kernel
-
-namespace Key {
-/// \brief Key for CodeObject::Metadata::mVersion.
-constexpr char Version[] = "Version";
-/// \brief Key for CodeObject::Metadata::mPrintf.
-constexpr char Printf[] = "Printf";
-/// \brief Key for CodeObject::Metadata::mKernels.
-constexpr char Kernels[] = "Kernels";
-} // end namespace Key
-
-/// \brief In-memory representation of code object metadata.
-struct Metadata final {
- /// \brief Code object metadata version. Required.
- std::vector<uint32_t> mVersion = std::vector<uint32_t>();
- /// \brief Printf metadata. Optional.
- std::vector<std::string> mPrintf = std::vector<std::string>();
- /// \brief Kernels metadata. Optional.
- std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
-
- /// \brief Default constructor.
- Metadata() = default;
-
- /// \brief Converts \p YamlString to \p CodeObjectMetadata.
- static std::error_code fromYamlString(std::string YamlString,
- Metadata &CodeObjectMetadata);
-
- /// \brief Converts \p CodeObjectMetadata to \p YamlString.
- static std::error_code toYamlString(Metadata CodeObjectMetadata,
- std::string &YamlString);
-};
-
-} // end namespace CodeObject
-} // end namespace AMDGPU
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
index 647017d5061d..4e828a791e09 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -13,20 +13,12 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "AMDGPU.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::AMDGPU;
-using namespace llvm::AMDGPU::CodeObject;
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -37,192 +29,7 @@ static cl::opt<bool> VerifyCodeObjectMetadata(
"amdgpu-verify-comd",
cl::desc("Verify AMDGPU Code Object Metadata"));
-namespace yaml {
-
-template <>
-struct ScalarEnumerationTraits<AccessQualifier> {
- static void enumeration(IO &YIO, AccessQualifier &EN) {
- YIO.enumCase(EN, "Default", AccessQualifier::Default);
- YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
- YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
- YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
- }
-};
-
-template <>
-struct ScalarEnumerationTraits<AddressSpaceQualifier> {
- static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
- YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
- YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
- YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
- YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
- YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
- YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
- }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueKind> {
- static void enumeration(IO &YIO, ValueKind &EN) {
- YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
- YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
- YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
- YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
- YIO.enumCase(EN, "Image", ValueKind::Image);
- YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
- YIO.enumCase(EN, "Queue", ValueKind::Queue);
- YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
- YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
- YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
- YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
- YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
- YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
- YIO.enumCase(EN, "HiddenCompletionAction",
- ValueKind::HiddenCompletionAction);
- }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueType> {
- static void enumeration(IO &YIO, ValueType &EN) {
- YIO.enumCase(EN, "Struct", ValueType::Struct);
- YIO.enumCase(EN, "I8", ValueType::I8);
- YIO.enumCase(EN, "U8", ValueType::U8);
- YIO.enumCase(EN, "I16", ValueType::I16);
- YIO.enumCase(EN, "U16", ValueType::U16);
- YIO.enumCase(EN, "F16", ValueType::F16);
- YIO.enumCase(EN, "I32", ValueType::I32);
- YIO.enumCase(EN, "U32", ValueType::U32);
- YIO.enumCase(EN, "F32", ValueType::F32);
- YIO.enumCase(EN, "I64", ValueType::I64);
- YIO.enumCase(EN, "U64", ValueType::U64);
- YIO.enumCase(EN, "F64", ValueType::F64);
- }
-};
-
-template <>
-struct MappingTraits<Kernel::Attrs::Metadata> {
- static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
- YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
- MD.mReqdWorkGroupSize, std::vector<uint32_t>());
- YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
- MD.mWorkGroupSizeHint, std::vector<uint32_t>());
- YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
- MD.mVecTypeHint, std::string());
- }
-};
-
-template <>
-struct MappingTraits<Kernel::Arg::Metadata> {
- static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
- YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
- YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
- YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
- YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
- YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
- uint32_t(0));
- YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
- AccessQualifier::Unknown);
- YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
- AddressSpaceQualifier::Unknown);
- YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
- YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
- YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
- YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
- YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
- YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
- }
-};
-
-template <>
-struct MappingTraits<Kernel::CodeProps::Metadata> {
- static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
- YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
- MD.mKernargSegmentSize, uint64_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
- MD.mWorkgroupGroupSegmentSize, uint32_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
- MD.mWorkitemPrivateSegmentSize, uint32_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
- MD.mWavefrontNumSGPRs, uint16_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
- MD.mWorkitemNumVGPRs, uint16_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
- MD.mKernargSegmentAlign, uint8_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
- MD.mGroupSegmentAlign, uint8_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
- MD.mPrivateSegmentAlign, uint8_t(0));
- YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
- MD.mWavefrontSize, uint8_t(0));
- }
-};
-
-template <>
-struct MappingTraits<Kernel::DebugProps::Metadata> {
- static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
- YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
- MD.mDebuggerABIVersion, std::vector<uint32_t>());
- YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
- MD.mReservedNumVGPRs, uint16_t(0));
- YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
- MD.mReservedFirstVGPR, uint16_t(-1));
- YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
- MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
- YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
- MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
- }
-};
-
-template <>
-struct MappingTraits<Kernel::Metadata> {
- static void mapping(IO &YIO, Kernel::Metadata &MD) {
- YIO.mapRequired(Kernel::Key::Name, MD.mName);
- YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
- YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
- std::vector<uint32_t>());
- if (!MD.mAttrs.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
- if (!MD.mArgs.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
- if (!MD.mCodeProps.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
- if (!MD.mDebugProps.empty() || !YIO.outputting())
- YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
- }
-};
-
-template <>
-struct MappingTraits<CodeObject::Metadata> {
- static void mapping(IO &YIO, CodeObject::Metadata &MD) {
- YIO.mapRequired(Key::Version, MD.mVersion);
- YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
- if (!MD.mKernels.empty() || !YIO.outputting())
- YIO.mapOptional(Key::Kernels, MD.mKernels);
- }
-};
-
-} // end namespace yaml
-
namespace AMDGPU {
-
-/* static */
-std::error_code CodeObject::Metadata::fromYamlString(
- std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
- yaml::Input YamlInput(YamlString);
- YamlInput >> CodeObjectMetadata;
- return YamlInput.error();
-}
-
-/* static */
-std::error_code CodeObject::Metadata::toYamlString(
- CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
- raw_string_ostream YamlStream(YamlString);
- yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
- YamlOutput << CodeObjectMetadata;
- return std::error_code();
-}
-
namespace CodeObject {
void MetadataStreamer::dump(StringRef YamlString) const {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
index 8d4c51763f63..c6681431d74d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -17,9 +17,9 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
#include "AMDGPU.h"
-#include "AMDGPUCodeObjectMetadata.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AMDGPUCodeObjectMetadata.h"
#include "llvm/Support/ErrorOr.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 073d19422e86..6abe7f3d37d5 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 8dc863f723e2..2a0032fc9adc 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUTargetStreamer.h"
+#include "AMDGPU.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
@@ -25,7 +26,6 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
namespace llvm {
@@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
}
}
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
- StringRef GlobalName) {
- OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
-}
-
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
- StringRef GlobalName) {
- OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
-}
-
bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
if (!VerifiedYamlString)
@@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
}
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
- StringRef GlobalName) {
-
- MCSymbolELF *Symbol = cast<MCSymbolELF>(
- getStreamer().getContext().getOrCreateSymbol(GlobalName));
- Symbol->setType(ELF::STT_OBJECT);
- Symbol->setBinding(ELF::STB_LOCAL);
-}
-
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
- StringRef GlobalName) {
-
- MCSymbolELF *Symbol = cast<MCSymbolELF>(
- getStreamer().getContext().getOrCreateSymbol(GlobalName));
- Symbol->setType(ELF::STT_OBJECT);
- Symbol->setBinding(ELF::STB_GLOBAL);
-}
-
bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
if (!VerifiedYamlString)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 5c588bbded9c..968128e94d0b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -44,10 +44,6 @@ public:
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
- virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
-
- virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
-
virtual void EmitStartOfCodeObjectMetadata(const Module &Mod);
virtual void EmitKernelCodeObjectMetadata(
@@ -74,10 +70,6 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
- void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
/// \returns True on success, false on failure.
bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
@@ -105,10 +97,6 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
- void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
/// \returns True on success, false on failure.
bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 6015ec190fd4..eab90e1d344c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -14,10 +14,10 @@
//
//===----------------------------------------------------------------------===//
-#include "R600Defines.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "R600Defines.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index 0e4eda982139..f6f2582aa11b 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -80,50 +80,53 @@ def : Proc<"cayman", R600_VLIW4_Itin,
// Southern Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"SI", SIFullSpeedModel,
- [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"gfx600", SIFullSpeedModel,
+ [FeatureISAVersion6_0_0]>;
+
+def : ProcessorModel<"SI", SIFullSpeedModel,
+ [FeatureISAVersion6_0_0]
+>;
+
+def : ProcessorModel<"tahiti", SIFullSpeedModel,
+ [FeatureISAVersion6_0_0]
>;
-def : ProcessorModel<"tahiti", SIFullSpeedModel,
- [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"gfx601", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]
>;
-def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]>;
-def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"verde", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]>;
-def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"oland", SIQuarterSpeedModel,
+ [FeatureISAVersion6_0_1]>;
-def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>;
//===----------------------------------------------------------------------===//
// Sea Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
+def : ProcessorModel<"gfx700", SIQuarterSpeedModel,
[FeatureISAVersion7_0_0]
>;
-def : ProcessorModel<"kabini", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_2]
+def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_0]
>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
[FeatureISAVersion7_0_0]
>;
-def : ProcessorModel<"hawaii", SIFullSpeedModel,
+def : ProcessorModel<"gfx701", SIFullSpeedModel,
[FeatureISAVersion7_0_1]
>;
-def : ProcessorModel<"mullins", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_2]>;
-
-def : ProcessorModel<"gfx700", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_0]
->;
-
-def : ProcessorModel<"gfx701", SIFullSpeedModel,
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
[FeatureISAVersion7_0_1]
>;
@@ -131,6 +134,17 @@ def : ProcessorModel<"gfx702", SIQuarterSpeedModel,
[FeatureISAVersion7_0_2]
>;
+def : ProcessorModel<"gfx703", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"kabini", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"mullins", SIQuarterSpeedModel,
+ [FeatureISAVersion7_0_3]>;
+
//===----------------------------------------------------------------------===//
// Volcanic Islands
//===----------------------------------------------------------------------===//
@@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
[FeatureISAVersion8_1_0]
>;
-def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
- [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
+//===----------------------------------------------------------------------===//
+// GFX9
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_0]
+>;
+
+def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_1]
+>;
+
+def : ProcessorModel<"gfx902", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_2]
>;
-def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
- [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
+def : ProcessorModel<"gfx903", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_3]
>;
+
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 09b328765604..6993e8a62a9c 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -12,15 +12,14 @@
/// computing their address on the fly ; it also sets STACK_SIZE info.
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +29,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 03fc1aff5ec1..0d8ccd088ec4 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -15,10 +15,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 5c30a0734f0d..66def2d29caf 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -15,11 +15,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 1f01ad732e00..37787b3c5f72 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -10,8 +10,8 @@
#include "R600FrameLowering.h"
#include "AMDGPUSubtarget.h"
#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 60b913cfd39a..c55878f8bff0 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
Mask = DAG.getConstant(0xff, DL, MVT::i32);
} else if (Store->getMemoryVT() == MVT::i16) {
assert(Store->getAlignment() >= 2);
- Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
+ Mask = DAG.getConstant(0xffff, DL, MVT::i32);
} else {
llvm_unreachable("Unsupported private trunc store");
}
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 2422d57269eb..c5da5e404200 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -12,12 +12,12 @@
//
//===----------------------------------------------------------------------===//
+#include "R600InstrInfo.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600FrameLowering.h"
-#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/BitVector.h"
@@ -35,8 +35,8 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
-#include <cstring>
#include <cstdint>
+#include <cstring>
#include <iterator>
#include <utility>
#include <vector>
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index db18e5bd1afa..47fda1c8fa82 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Pass.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index 3e957126b497..1cb40938cee7 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
@@ -24,6 +23,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
index 62ebef8e91af..b5c439b21b89 100644
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
@@ -19,8 +19,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SIInstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 3cca815d8773..5f5f25103c02 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -65,10 +65,10 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/DenseSet.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index dfac068d1f69..e10f1ed3762e 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -730,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
// Make sure sources are identical.
const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() ||
+ if (!Src0->isReg() || !Src1->isReg() ||
+ Src0->getSubReg() != Src1->getSubReg() ||
Src0->getSubReg() != AMDGPU::NoSubRegister)
return nullptr;
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 97bb0f0c0656..b1bd14e421f0 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -8,10 +8,10 @@
//==-----------------------------------------------------------------------===//
#include "SIFrameLowering.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index b48b23911105..599ee942d738 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,12 +17,12 @@
#define _USE_MATH_DEFINES
#endif
+#include "SIISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
-#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -2604,7 +2604,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
- return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
}
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c10badba88f3..0f009a48754a 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -229,7 +229,7 @@ public:
MachineInstr &MI);
BlockWaitcntBrackets()
- : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false),
+ : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false),
LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 36d29b8ecf06..58c05cf16f15 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -20,10 +20,10 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 35d3a93d8710..5f1c7f1fc42f 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -60,8 +60,8 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 3680e02da576..ba616ada0c9c 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 348bb4fa0260..9fdb8caac6f2 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -15,8 +15,8 @@
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#include "AMDGPUMachineFunction.h"
-#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 9d4e677400e6..bb17dbbdfbd6 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include "SIMachineScheduler.h"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
-#include "SIMachineScheduler.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index fae249b04492..f4ddf1891683 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -20,13 +20,12 @@
///
//===----------------------------------------------------------------------===//
-
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include <unordered_map>
@@ -129,7 +128,8 @@ public:
bool getNeg() const { return Neg; }
bool getSext() const { return Sext; }
- uint64_t getSrcMods() const;
+ uint64_t getSrcMods(const SIInstrInfo *TII,
+ const MachineOperand *SrcOp) const;
};
class SDWADstOperand : public SDWAOperand {
@@ -240,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg,
return SuperMask.all();
}
-uint64_t SDWASrcOperand::getSrcMods() const {
+uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
+ const MachineOperand *SrcOp) const {
uint64_t Mods = 0;
+ const auto *MI = SrcOp->getParent();
+ if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
+ if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+ Mods = Mod->getImm();
+ }
+ } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
+ if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
+ Mods = Mod->getImm();
+ }
+ }
if (Abs || Neg) {
assert(!Sext &&
"Float and integer src modifiers can't be set simulteniously");
Mods |= Abs ? SISrcMods::ABS : 0;
- Mods |= Neg ? SISrcMods::NEG : 0;
+ Mods ^= Neg ? SISrcMods::NEG : 0;
} else if (Sext) {
Mods |= SISrcMods::SEXT;
}
@@ -312,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
}
copyRegOperand(*Src, *getTargetOperand());
SrcSel->setImm(getSrcSel());
- SrcMods->setImm(getSrcMods());
+ SrcMods->setImm(getSrcMods(TII, Src));
getTargetOperand()->setIsKill(false);
return true;
}
@@ -409,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
switch (Opcode) {
case AMDGPU::V_LSHRREV_B32_e32:
case AMDGPU::V_ASHRREV_I32_e32:
- case AMDGPU::V_LSHLREV_B32_e32: {
+ case AMDGPU::V_LSHLREV_B32_e32:
+ case AMDGPU::V_LSHRREV_B32_e64:
+ case AMDGPU::V_ASHRREV_I32_e64:
+ case AMDGPU::V_LSHLREV_B32_e64: {
// from: v_lshrrev_b32_e32 v1, 16/24, v0
// to SDWA src:v0 src_sel:WORD_1/BYTE_3
@@ -432,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
TRI->isPhysicalRegister(Dst->getReg()))
break;
- if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+ if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
+ Opcode == AMDGPU::V_LSHLREV_B32_e64) {
auto SDWADst = make_unique<SDWADstOperand>(
Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -441,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
} else {
auto SDWASrc = make_unique<SDWASrcOperand>(
Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
- Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+ Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
+ Opcode != AMDGPU::V_LSHRREV_B32_e64);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
@@ -451,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
case AMDGPU::V_LSHRREV_B16_e32:
case AMDGPU::V_ASHRREV_I16_e32:
- case AMDGPU::V_LSHLREV_B16_e32: {
+ case AMDGPU::V_LSHLREV_B16_e32:
+ case AMDGPU::V_LSHRREV_B16_e64:
+ case AMDGPU::V_ASHRREV_I16_e64:
+ case AMDGPU::V_LSHLREV_B16_e64: {
// from: v_lshrrev_b16_e32 v1, 8, v0
// to SDWA src:v0 src_sel:BYTE_1
@@ -472,7 +491,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
TRI->isPhysicalRegister(Dst->getReg()))
break;
- if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+ if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
+ Opcode == AMDGPU::V_LSHLREV_B16_e64) {
auto SDWADst =
make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -481,7 +501,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
} else {
auto SDWASrc = make_unique<SDWASrcOperand>(
Src1, Dst, BYTE_1, false, false,
- Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+ Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
+ Opcode != AMDGPU::V_LSHRREV_B16_e64);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
@@ -549,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
++NumSDWAPatternsFound;
break;
}
- case AMDGPU::V_AND_B32_e32: {
+ case AMDGPU::V_AND_B32_e32:
+ case AMDGPU::V_AND_B32_e64: {
// e.g.:
// from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
// to SDWA src:v0 src_sel:WORD_0/BYTE_0
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ auto ValSrc = Src1;
auto Imm = foldToImm(*Src0);
- if (!Imm)
- break;
- if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
+ if (!Imm) {
+ Imm = foldToImm(*Src1);
+ ValSrc = Src0;
+ }
+
+ if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
break;
- MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (TRI->isPhysicalRegister(Src1->getReg()) ||
@@ -570,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
break;
auto SDWASrc = make_unique<SDWASrcOperand>(
- Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
+ ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
@@ -583,28 +609,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
// Check if this instruction has opcode that supports SDWA
- return AMDGPU::getSDWAOp(MI.getOpcode()) != -1;
+ unsigned Opc = MI.getOpcode();
+ if (AMDGPU::getSDWAOp(Opc) != -1)
+ return true;
+ int Opc32 = AMDGPU::getVOPe32(Opc);
+ if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
+ return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+ !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ return false;
}
bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
const SDWAOperandsVector &SDWAOperands) {
// Convert to sdwa
int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
+ if (SDWAOpcode == -1)
+ SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
assert(SDWAOpcode != -1);
+ // Copy dst, if it is present in original then should also be present in SDWA
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (!Dst && !TII->isVOPC(MI))
+ return false;
+
const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
- // Copy dst, if it is present in original then should also be present in SDWA
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
- } else {
- assert(TII->isVOPC(MI));
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
Src0 &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
- SDWAInst.addImm(0);
+ if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
+ SDWAInst.addImm(Mod->getImm());
+ else
+ SDWAInst.addImm(0);
SDWAInst.add(*Src0);
// Copy src1 if present, initialize src1_modifiers.
@@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
assert(
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
- SDWAInst.addImm(0);
+ if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
+ SDWAInst.addImm(Mod->getImm());
+ else
+ SDWAInst.addImm(0);
SDWAInst.add(*Src1);
- } else {
- assert(TII->isVOP1(MI));
}
if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
@@ -746,8 +786,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
PotentialMatches.clear();
SDWAOperands.clear();
+ bool Ret = !ConvertedInstructions.empty();
while (!ConvertedInstructions.empty())
legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
- return false;
+ return Ret;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 6fb01a09fe13..b611f28fcabd 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -13,9 +13,9 @@
//===----------------------------------------------------------------------===//
#include "SIRegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -1104,6 +1104,66 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
}
+StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
+ #define AMDGPU_REG_ASM_NAMES
+ #include "AMDGPURegAsmNames.inc.cpp"
+
+ #define REG_RANGE(BeginReg, EndReg, RegTable) \
+ if (Reg >= BeginReg && Reg <= EndReg) { \
+ unsigned Index = Reg - BeginReg; \
+ assert(Index < array_lengthof(RegTable)); \
+ return RegTable[Index]; \
+ }
+
+ REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
+ REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
+ REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
+ REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
+ REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
+ VGPR96RegNames);
+
+ REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
+ AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
+ VGPR128RegNames);
+ REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
+ AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
+ SGPR128RegNames);
+
+ REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
+ AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+ VGPR256RegNames);
+
+ REG_RANGE(
+ AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
+ AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+ VGPR512RegNames);
+
+ REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
+ AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+ SGPR256RegNames);
+
+ REG_RANGE(
+ AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
+ AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+ SGPR512RegNames
+ );
+
+#undef REG_RANGE
+
+ // FIXME: Rename flat_scr so we don't need to special case this.
+ switch (Reg) {
+ case AMDGPU::FLAT_SCR:
+ return "flat_scratch";
+ case AMDGPU::FLAT_SCR_LO:
+ return "flat_scratch_lo";
+ case AMDGPU::FLAT_SCR_HI:
+ return "flat_scratch_hi";
+ default:
+ // For the special named registers the default is fine.
+ return TargetRegisterInfo::getRegAsmName(Reg);
+ }
+}
+
// FIXME: This is very slow. It might be worth creating a map from physreg to
// register class.
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index a648c178101a..8fed6d5f9710 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -16,8 +16,8 @@
#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
-#include "SIDefines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -118,6 +118,8 @@ public:
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
int FI, RegScavenger *RS) const;
+ StringRef getRegAsmName(unsigned Reg) const override;
+
unsigned getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 630f469eabf0..f581e69980c7 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,11 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
+#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -27,7 +28,6 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
@@ -38,7 +38,6 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-
#define GET_INSTRINFO_NAMED_OPS
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_NAMED_OPS
@@ -104,6 +103,11 @@ namespace AMDGPU {
namespace IsaInfo {
IsaVersion getIsaVersion(const FeatureBitset &Features) {
+ // SI.
+ if (Features.test(FeatureISAVersion6_0_0))
+ return {6, 0, 0};
+ if (Features.test(FeatureISAVersion6_0_1))
+ return {6, 0, 1};
// CI.
if (Features.test(FeatureISAVersion7_0_0))
return {7, 0, 0};
@@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
return {7, 0, 1};
if (Features.test(FeatureISAVersion7_0_2))
return {7, 0, 2};
+ if (Features.test(FeatureISAVersion7_0_3))
+ return {7, 0, 3};
// VI.
if (Features.test(FeatureISAVersion8_0_0))
@@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
return {9, 0, 0};
if (Features.test(FeatureISAVersion9_0_1))
return {9, 0, 1};
+ if (Features.test(FeatureISAVersion9_0_2))
+ return {9, 0, 2};
+ if (Features.test(FeatureISAVersion9_0_3))
+ return {9, 0, 3};
if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
return {0, 0, 0};
@@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.private_segment_alignment = 4;
}
-MCSection *getHSATextSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE |
- ELF::SHF_EXECINSTR |
- ELF::SHF_AMDGPU_HSA_AGENT |
- ELF::SHF_AMDGPU_HSA_CODE);
-}
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE |
- ELF::SHF_AMDGPU_HSA_GLOBAL |
- ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE |
- ELF::SHF_AMDGPU_HSA_GLOBAL);
-}
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
- return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
- ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
}
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 19888ad7556a..eff0230d21f5 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
-MCSection *getHSATextSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 77fc9551cff9..a8ca593f14ed 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
-def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbit>;
+def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
@@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64,
}
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
+
+let Constraints = "@earlyclobber $vdst" in {
def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>;
+} // End Constraints = "@earlyclobber $vdst"
def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
let SchedRW = [WriteDouble];
@@ -232,8 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
let SubtargetPredicate = isCIVI in {
+let Constraints = "@earlyclobber $vdst" in {
def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>;
def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
+} // End Constraints = "@earlyclobber $vdst"
let isCommutable = 1 in {
def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;