diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-10 13:44:06 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-10 13:44:06 +0000 |
commit | 7ab83427af0f77b59941ceba41d509d7d097b065 (patch) | |
tree | cc41c05b1db454e3d802f34df75e636ee922ad87 /lib/Target/AMDGPU | |
parent | d288ef4c1788d3a951a7558c68312c2d320612b1 (diff) |
Notes
Diffstat (limited to 'lib/Target/AMDGPU')
63 files changed, 715 insertions, 928 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index b50e8d1d659e..6ab2b9ef0459 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -447,6 +447,16 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping, Implies >; +def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0, + [FeatureSouthernIslands, + FeatureFastFMAF32, + HalfRate64Ops, + FeatureLDSBankCount32]>; + +def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1, + [FeatureSouthernIslands, + FeatureLDSBankCount32]>; + def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, [FeatureSeaIslands, FeatureLDSBankCount32]>; @@ -461,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2, [FeatureSeaIslands, FeatureLDSBankCount16]>; +def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3, + [FeatureSeaIslands, + FeatureLDSBankCount16]>; + def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0, [FeatureVolcanicIslands, FeatureLDSBankCount32, @@ -489,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, FeatureLDSBankCount16, FeatureXNACK]>; -def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>; -def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>; +def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; + +def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 3c99f48e818a..faa424eb0a64 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -10,15 +10,15 @@ /// This is the AMGPU address space based alias analysis pass. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 91b3649f5c39..3c788fa1dcea 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -19,8 +19,8 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0959014812d8..83ad1a5c6ee3 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,25 +17,25 @@ // #include "AMDGPUAsmPrinter.h" -#include "AMDGPUTargetMachine.h" -#include "MCTargetDesc/AMDGPUTargetStreamer.h" -#include "InstPrinter/AMDGPUInstPrinter.h" -#include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "InstPrinter/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "SIDefines.h" -#include "SIMachineFunctionInfo.h" #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index e5adeeb465e1..0a58ce06704d 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,8 +15,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H -#include "AMDKernelCodeT.h" #include "AMDGPU.h" +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include <cstddef> diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index e67ae092fdda..515cc07dd449 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -18,8 +18,8 @@ #include "AMDGPUISelLowering.h" #include "AMDGPUSubtarget.h" #include "SIISelLowering.h" -#include "SIRegisterInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index d923cb117c12..b312dbc8d14d 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -25,13 +25,13 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 19fce064783d..251c2f9bb25a 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterInfo.h" -#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUSubtarget.h" #include "SIDefines.h" -#include "SIInstrInfo.h" -#include "SIRegisterInfo.h" #include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 723e8a7b54e2..5586b513b5fc 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -21,6 +21,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "R600MachineFunctionInfo.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,7 +31,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/KnownBits.h" -#include "SIInstrInfo.h" using namespace llvm; static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 12caa5118342..41cc7d7093ec 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -17,8 +17,8 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #include "AMDGPU.h" -#include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c87102e55dfb..ef845f44d365 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #include "AMDGPU.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 267f4807a788..b889788c3426 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -14,10 +14,10 @@ #include "AMDGPULegalizerInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/Target/TargetOpcodes.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; @@ -47,12 +47,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() { setAction({G_GEP, P2}, Legal); setAction({G_GEP, 1, S64}, Legal); + setAction({G_ICMP, S1}, Legal); + setAction({G_ICMP, 1, S32}, Legal); + setAction({G_LOAD, P1}, Legal); setAction({G_LOAD, P2}, Legal); setAction({G_LOAD, S32}, Legal); setAction({G_LOAD, 1, P1}, Legal); setAction({G_LOAD, 1, P2}, Legal); + setAction({G_SELECT, S32}, Legal); + setAction({G_SELECT, 1, S1}, Legal); + setAction({G_STORE, S32}, Legal); setAction({G_STORE, 1, P1}, Legal); diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index f1ef6281c90f..63dd0d726d91 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -38,7 +38,6 @@ using namespace llvm; #include "AMDGPUGenMCPseudoLowering.inc" - AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st, const AsmPrinter &ap): Ctx(ctx), ST(st), AP(ap) { } diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 6d2785ba1c60..2071b6f157cd 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 8bfeb67ad4ec..99bb61b21db0 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -10,8 +10,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 07f92918a43f..625c9b77e2de 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -33,11 +33,11 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -319,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: { LoadInst *LI = cast<LoadInst>(Inst); - return !LI->isVolatile(); + // Currently only handle the case where the Pointer Operand is a GEP so check for that case. + return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile(); } case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; case Instruction::Store: { - // Must be the stored pointer operand, not a stored value. + // Must be the stored pointer operand, not a stored value, plus + // since it should be canonical form, the User should be a GEP. StoreInst *SI = cast<StoreInst>(Inst); - return (SI->getPointerOperand() == User) && !SI->isVolatile(); + return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile(); } default: return false; @@ -341,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. + // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these + // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getElementType()->isVectorTy() || + AllocaTy->getElementType()->isArrayTy() || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2) { DEBUG(dbgs() << " Cannot convert type to vector\n"); @@ -390,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(0); + Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); @@ -403,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(1); + StoreInst *SI = cast<StoreInst>(Inst); + Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, - Inst->getOperand(0), + SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp new file mode 100644 index 000000000000..36d88f52910d --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -0,0 +1,353 @@ +//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===// + +#ifdef AMDGPU_REG_ASM_NAMES + +static const char *const VGPR32RegNames[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", + "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", + "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", + "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", + "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", + "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", + "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", + "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", + "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", + "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", + "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", + "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", + "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", + "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", + "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", + "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", + "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", + "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", + "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", + "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", + "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", + "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", + "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", + "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", + "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", + "v252", "v253", "v254", "v255" +}; + +static const char *const SGPR32RegNames[] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", + "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", + "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", + "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", + "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", + "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", + "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", + "s100", "s101", "s102", "s103" +}; + +static const char *const VGPR64RegNames[] = { + "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]", + "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]", + "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]", + "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]", + "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]", + "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]", + "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]", + "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]", + "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]", + "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]", + "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]", + "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]", + "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]", + "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]", + "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]", + "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]", + "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]", + "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]", + "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]", + "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]", + "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]", + "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]", + "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]", + "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]", + "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]", + "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]", + "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]", + "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]", + "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]", + "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]", + "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]", + "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]", + "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]", + "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]", + "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]", + "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]", + "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]", + "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]", + "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]", + "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]", + "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]", + "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]", + "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]", + "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]", + "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]", + "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]", + "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]", + "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]", + "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]", + "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]", + "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]" +}; + +static const char *const VGPR96RegNames[] = { + "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]", + "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]", + "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]", + "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]", + "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]", + "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]", + "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]", + "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]", + "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]", + "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]", + "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]", + "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]", + "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]", + "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]", + "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]", + "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]", + "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]", + "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]", + "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]", + "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]", + "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]", + "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]", + "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]", + "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]", + "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]", + "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]", + "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]", + "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]", + "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]", + "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]", + "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]", + "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]", + "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]", + "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]", + "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]", + "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]", + "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]", + "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]", + "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]", + "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]", + "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]", + "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]", + "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]", + "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]", + "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]", + "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]", + "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]", + "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]", + "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]", + "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]", + "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]" +}; + +static const char *const VGPR128RegNames[] = { + "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]", + "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]", + "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]", + "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]", + "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]", + "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]", + "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]", + "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]", + "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]", + "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]", + "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]", + "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]", + "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]", + "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]", + "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]", + "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]", + "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]", + "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]", + "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]", + "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]", + "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]", + "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]", + "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]", + "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]", + "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]", + "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]", + "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]", + "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]", + "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]", + "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]", + "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]", + "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]", + "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]", + "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]", + "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]", + "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]", + "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]", + "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]", + "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]", + "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]", + "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]", + "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]", + "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]", + "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]", + "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]", + "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]", + "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]", + "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]", + "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]", + "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]", + "v[250:253]", "v[251:254]", "v[252:255]" +}; + +static const char *const VGPR256RegNames[] = { + "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]", + "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]", + "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]", + "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]", + "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]", + "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]", + "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]", + "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]", + "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]", + "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]", + "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]", + "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]", + "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]", + "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]", + "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]", + "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]", + "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]", + "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]", + "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]", + "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]", + "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]", + "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]", + "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]", + "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]", + "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]", + "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]", + "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]", + "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]", + "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]", + "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]", + "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]", + "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]", + "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]", + "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]", + "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]", + "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]", + "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]", + "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]", + "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]", + "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]", + "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]", + "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]", + "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]", + "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]", + "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]", + "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]", + "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]", + "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]", + "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]", + "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]" +}; + +static const char *const VGPR512RegNames[] = { + "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]", + "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]", + "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]", + "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]", + "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]", + "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]", + "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]", + "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]", + "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]", + "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]", + "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]", + "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]", + "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]", + "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]", + "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]", + "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]", + "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]", + "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]", + "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]", + "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]", + "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]", + "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]", + "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]", + "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]", + "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]", + "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]", + "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]", + "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]", + "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]", + "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]", + "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]", + "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]", + "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]", + "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]", + "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]", + "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]", + "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]", + "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]", + "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]", + "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]", + "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]", + "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]", + "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]", + "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]", + "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]", + "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]", + "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]", + "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]", + "v[240:255]" +}; + +static const char *const SGPR64RegNames[] = { + "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]", + "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]", + "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]", + "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]", + "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]", + "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", + "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", + "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", + "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]" +}; + +static const char *const SGPR128RegNames[] = { + "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]", + "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]", + "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]", + "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]", + "s[96:99]", "s[100:103]" +}; + +static const char *const SGPR256RegNames[] = { + "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]", + "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]", + "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]", + "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]", + "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]" +}; + +static const char *const SGPR512RegNames[] = { + "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]", + "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]", + "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]", + "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" +}; + +#endif diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 7c198a1b8a3f..201fdc1974c6 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -36,7 +36,6 @@ protected: #define GET_TARGET_REGBANK_CLASS #include "AMDGPUGenRegisterBank.inc" - }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const SIRegisterInfo *TRI; diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index b2867fcc49f9..ff58aa5741a1 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -40,7 +40,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { #define GET_REGINFO_TARGET_DESC #include "AMDGPUGenRegisterInfo.inc" - // Forced to be here by one .inc const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( const MachineFunction *MF) const { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index ed9cbb994fad..5f4f20316a6b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -16,12 +16,12 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "R600ISelLowering.h" #include "R600FrameLowering.h" -#include "SIInstrInfo.h" -#include "SIISelLowering.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" #include "SIFrameLowering.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Triple.h" @@ -57,9 +57,12 @@ public: enum { ISAVersion0_0_0, + ISAVersion6_0_0, + ISAVersion6_0_1, ISAVersion7_0_0, ISAVersion7_0_1, ISAVersion7_0_2, + ISAVersion7_0_3, ISAVersion8_0_0, ISAVersion8_0_1, ISAVersion8_0_2, @@ -67,7 +70,9 @@ public: ISAVersion8_0_4, ISAVersion8_1_0, ISAVersion9_0_0, - ISAVersion9_0_1 + ISAVersion9_0_1, + ISAVersion9_0_2, + ISAVersion9_0_3 }; enum TrapHandlerAbi { @@ -787,7 +792,7 @@ public: /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());; + return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits()); } /// \returns VGPR encoding granularity supported by the subtarget. diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 404598ff4738..b644eba536fa 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -28,26 +28,26 @@ #include "GCNSchedStrategy.h" #include "R600MachineScheduler.h" #include "SIMachineScheduler.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/AlwaysInliner.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Vectorize.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Vectorize.h" #include <memory> using namespace llvm; @@ -734,7 +734,6 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); addPass(&SILoadStoreOptimizerID); - addPass(createSIShrinkInstructionsPass()); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); addPass(&MachineLICMID); @@ -742,6 +741,7 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); } + addPass(createSIShrinkInstructionsPass()); } bool GCNPassConfig::addILPOpts() { diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index c96761c0b04e..6c1885e67fcb 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" -#include "Utils/AMDGPUBaseInfo.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index beafebc1284a..dee3d2856701 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index cc68c971b249..16e3b7b4ebee 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -11,18 +11,19 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" -#include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -40,12 +41,11 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -814,14 +814,8 @@ private: bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool ParseSectionDirectiveHSAText(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); - bool ParseDirectiveAMDGPUHsaModuleGlobal(); - bool ParseDirectiveAMDGPUHsaProgramGlobal(); - bool ParseSectionDirectiveHSADataGlobalAgent(); - bool ParseSectionDirectiveHSADataGlobalProgram(); - bool ParseSectionDirectiveHSARodataReadonlyAgent(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -2365,12 +2359,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { return false; } -bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSATextSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { if (getLexer().isNot(AsmToken::Identifier)) return TokError("expected symbol name"); @@ -2384,46 +2372,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalAgentSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalProgramSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSARodataReadonlyAgentSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2439,27 +2387,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); - if (IDVal == ".hsatext") - return ParseSectionDirectiveHSAText(); - if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); - if (IDVal == ".amdgpu_hsa_module_global") - return ParseDirectiveAMDGPUHsaModuleGlobal(); - - if (IDVal == ".amdgpu_hsa_program_global") - return ParseDirectiveAMDGPUHsaProgramGlobal(); - - if (IDVal == ".hsadata_global_agent") - return ParseSectionDirectiveHSADataGlobalAgent(); - - if (IDVal == ".hsadata_global_program") - return ParseSectionDirectiveHSADataGlobalProgram(); - - if (IDVal == ".hsarodata_readonly_agent") - return ParseSectionDirectiveHSARodataReadonlyAgent(); - return true; } @@ -2919,6 +2849,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().isNot(AsmToken::Integer)) return true; + SMLoc ValLoc = Parser.getTok().getLoc(); if (getParser().parseAbsoluteExpression(CntVal)) return true; @@ -2936,21 +2867,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); } - // To improve diagnostics, do not skip delimiters on errors - if (!Failed) { - if (getLexer().isNot(AsmToken::RParen)) { - return true; - } - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { - const AsmToken NextToken = getLexer().peekTok(); - if (NextToken.is(AsmToken::Identifier)) { - Parser.Lex(); - } + if (Failed) { + Error(ValLoc, "too large value for " + CntName); + return true; + } + + if (getLexer().isNot(AsmToken::RParen)) { + return true; + } + + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); } } - return Failed; + return false; } OperandMatchResultTy diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index cafce0164fa9..e30844f082cd 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUISelLowering.cpp AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp + AMDGPURegAsmNames.inc.cpp AMDGPURegisterInfo.cpp AMDGPUUnifyDivergentExitNodes.cpp GCNHazardRecognizer.cpp diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9b3cde7c4df6..88c92b9582fd 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -20,21 +20,20 @@ #include "AMDGPUDisassembler.h" #include "AMDGPU.h" #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/TargetRegistry.h" - using namespace llvm; #define DEBUG_TYPE "amdgpu-disassembler" diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 0ff405a71e9b..5fa3cf1a223f 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -20,8 +20,8 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -#include <cstdint> #include <algorithm> +#include <cstdint> #include <memory> namespace llvm { diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 80fc4ac9d2a3..cd9e7fb04f16 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 523eea41897e..b84640230eee 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUInstPrinter.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index f3266fe82955..0a9c2b94c1ee 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -8,8 +8,8 @@ /// \file //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h deleted file mode 100644 index 816e8c744b27..000000000000 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h +++ /dev/null @@ -1,422 +0,0 @@ -//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief AMDGPU Code Object Metadata definitions and in-memory -/// representations. -/// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H - -#include <cstdint> -#include <string> -#include <system_error> -#include <vector> - -namespace llvm { -namespace AMDGPU { - -//===----------------------------------------------------------------------===// -// Code Object Metadata. -//===----------------------------------------------------------------------===// -namespace CodeObject { - -/// \brief Code object metadata major version. -constexpr uint32_t MetadataVersionMajor = 1; -/// \brief Code object metadata minor version. -constexpr uint32_t MetadataVersionMinor = 0; - -/// \brief Code object metadata beginning assembler directive. -constexpr char MetadataAssemblerDirectiveBegin[] = - ".amdgpu_code_object_metadata"; -/// \brief Code object metadata ending assembler directive. -constexpr char MetadataAssemblerDirectiveEnd[] = - ".end_amdgpu_code_object_metadata"; - -/// \brief Access qualifiers. -enum class AccessQualifier : uint8_t { - Default = 0, - ReadOnly = 1, - WriteOnly = 2, - ReadWrite = 3, - Unknown = 0xff -}; - -/// \brief Address space qualifiers. -enum class AddressSpaceQualifier : uint8_t { - Private = 0, - Global = 1, - Constant = 2, - Local = 3, - Generic = 4, - Region = 5, - Unknown = 0xff -}; - -/// \brief Value kinds. -enum class ValueKind : uint8_t { - ByValue = 0, - GlobalBuffer = 1, - DynamicSharedPointer = 2, - Sampler = 3, - Image = 4, - Pipe = 5, - Queue = 6, - HiddenGlobalOffsetX = 7, - HiddenGlobalOffsetY = 8, - HiddenGlobalOffsetZ = 9, - HiddenNone = 10, - HiddenPrintfBuffer = 11, - HiddenDefaultQueue = 12, - HiddenCompletionAction = 13, - Unknown = 0xff -}; - -/// \brief Value types. -enum class ValueType : uint8_t { - Struct = 0, - I8 = 1, - U8 = 2, - I16 = 3, - U16 = 4, - F16 = 5, - I32 = 6, - U32 = 7, - F32 = 8, - I64 = 9, - U64 = 10, - F64 = 11, - Unknown = 0xff -}; - -//===----------------------------------------------------------------------===// -// Kernel Metadata. -//===----------------------------------------------------------------------===// -namespace Kernel { - -//===----------------------------------------------------------------------===// -// Kernel Attributes Metadata. -//===----------------------------------------------------------------------===// -namespace Attrs { - -namespace Key { -/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize. -constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize"; -/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint. -constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint"; -/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint. -constexpr char VecTypeHint[] = "VecTypeHint"; -} // end namespace Key - -/// \brief In-memory representation of kernel attributes metadata. -struct Metadata final { - /// \brief 'reqd_work_group_size' attribute. Optional. - std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>(); - /// \brief 'work_group_size_hint' attribute. Optional. - std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>(); - /// \brief 'vec_type_hint' attribute. Optional. - std::string mVecTypeHint = std::string(); - - /// \brief Default constructor. - Metadata() = default; - - /// \returns True if kernel attributes metadata is empty, false otherwise. - bool empty() const { - return mReqdWorkGroupSize.empty() && - mWorkGroupSizeHint.empty() && - mVecTypeHint.empty(); - } - - /// \returns True if kernel attributes metadata is not empty, false otherwise. - bool notEmpty() const { - return !empty(); - } -}; - -} // end namespace Attrs - -//===----------------------------------------------------------------------===// -// Kernel Argument Metadata. -//===----------------------------------------------------------------------===// -namespace Arg { - -namespace Key { -/// \brief Key for Kernel::Arg::Metadata::mSize. -constexpr char Size[] = "Size"; -/// \brief Key for Kernel::Arg::Metadata::mAlign. -constexpr char Align[] = "Align"; -/// \brief Key for Kernel::Arg::Metadata::mValueKind. -constexpr char ValueKind[] = "ValueKind"; -/// \brief Key for Kernel::Arg::Metadata::mValueType. -constexpr char ValueType[] = "ValueType"; -/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign. -constexpr char PointeeAlign[] = "PointeeAlign"; -/// \brief Key for Kernel::Arg::Metadata::mAccQual. -constexpr char AccQual[] = "AccQual"; -/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual. -constexpr char AddrSpaceQual[] = "AddrSpaceQual"; -/// \brief Key for Kernel::Arg::Metadata::mIsConst. -constexpr char IsConst[] = "IsConst"; -/// \brief Key for Kernel::Arg::Metadata::mIsPipe. -constexpr char IsPipe[] = "IsPipe"; -/// \brief Key for Kernel::Arg::Metadata::mIsRestrict. -constexpr char IsRestrict[] = "IsRestrict"; -/// \brief Key for Kernel::Arg::Metadata::mIsVolatile. -constexpr char IsVolatile[] = "IsVolatile"; -/// \brief Key for Kernel::Arg::Metadata::mName. -constexpr char Name[] = "Name"; -/// \brief Key for Kernel::Arg::Metadata::mTypeName. -constexpr char TypeName[] = "TypeName"; -} // end namespace Key - -/// \brief In-memory representation of kernel argument metadata. -struct Metadata final { - /// \brief Size in bytes. Required. - uint32_t mSize = 0; - /// \brief Alignment in bytes. Required. - uint32_t mAlign = 0; - /// \brief Value kind. Required. - ValueKind mValueKind = ValueKind::Unknown; - /// \brief Value type. Required. - ValueType mValueType = ValueType::Unknown; - /// \brief Pointee alignment in bytes. Optional. - uint32_t mPointeeAlign = 0; - /// \brief Access qualifier. Optional. - AccessQualifier mAccQual = AccessQualifier::Unknown; - /// \brief Address space qualifier. Optional. - AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown; - /// \brief True if 'const' qualifier is specified. Optional. - bool mIsConst = false; - /// \brief True if 'pipe' qualifier is specified. Optional. - bool mIsPipe = false; - /// \brief True if 'restrict' qualifier is specified. Optional. - bool mIsRestrict = false; - /// \brief True if 'volatile' qualifier is specified. Optional. - bool mIsVolatile = false; - /// \brief Name. Optional. - std::string mName = std::string(); - /// \brief Type name. Optional. - std::string mTypeName = std::string(); - - /// \brief Default constructor. - Metadata() = default; -}; - -} // end namespace Arg - -//===----------------------------------------------------------------------===// -// Kernel Code Properties Metadata. -//===----------------------------------------------------------------------===// -namespace CodeProps { - -namespace Key { -/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize. -constexpr char KernargSegmentSize[] = "KernargSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize. -constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize. -constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs. -constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs. -constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs"; -/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign. -constexpr char KernargSegmentAlign[] = "KernargSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign. -constexpr char GroupSegmentAlign[] = "GroupSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign. -constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize. -constexpr char WavefrontSize[] = "WavefrontSize"; -} // end namespace Key - -/// \brief In-memory representation of kernel code properties metadata. -struct Metadata final { - /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory - /// holds the values of the arguments to the kernel. Optional. - uint64_t mKernargSegmentSize = 0; - /// \brief Size in bytes of the group segment memory required by a workgroup. - /// This value does not include any dynamically allocated group segment memory - /// that may be added when the kernel is dispatched. Optional. - uint32_t mWorkgroupGroupSegmentSize = 0; - /// \brief Size in bytes of the private segment memory required by a workitem. - /// Private segment memory includes arg, spill and private segments. Optional. - uint32_t mWorkitemPrivateSegmentSize = 0; - /// \brief Total number of SGPRs used by a wavefront. Optional. - uint16_t mWavefrontNumSGPRs = 0; - /// \brief Total number of VGPRs used by a workitem. Optional. - uint16_t mWorkitemNumVGPRs = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// kernarg memory segment. Expressed as a power of two. Optional. - uint8_t mKernargSegmentAlign = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// group memory segment. Expressed as a power of two. Optional. - uint8_t mGroupSegmentAlign = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// private memory segment. Expressed as a power of two. Optional. - uint8_t mPrivateSegmentAlign = 0; - /// \brief Wavefront size. Expressed as a power of two. Optional. - uint8_t mWavefrontSize = 0; - - /// \brief Default constructor. - Metadata() = default; - - /// \returns True if kernel code properties metadata is empty, false - /// otherwise. - bool empty() const { - return !notEmpty(); - } - - /// \returns True if kernel code properties metadata is not empty, false - /// otherwise. - bool notEmpty() const { - return mKernargSegmentSize || mWorkgroupGroupSegmentSize || - mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs || - mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign || - mPrivateSegmentAlign || mWavefrontSize; - } -}; - -} // end namespace CodeProps - -//===----------------------------------------------------------------------===// -// Kernel Debug Properties Metadata. -//===----------------------------------------------------------------------===// -namespace DebugProps { - -namespace Key { -/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion. -constexpr char DebuggerABIVersion[] = "DebuggerABIVersion"; -/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs. -constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs"; -/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR. -constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR"; -/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR. -constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR"; -/// \brief Key for -/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR. -constexpr char WavefrontPrivateSegmentOffsetSGPR[] = - "WavefrontPrivateSegmentOffsetSGPR"; -} // end namespace Key - -/// \brief In-memory representation of kernel debug properties metadata. -struct Metadata final { - /// \brief Debugger ABI version. Optional. - std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>(); - /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if - /// mDebuggerABIVersion is not set. Optional. - uint16_t mReservedNumVGPRs = 0; - /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if - /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional. - uint16_t mReservedFirstVGPR = uint16_t(-1); - /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used - /// for the entire kernel execution. Must be uint16_t(-1) if - /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional. - uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1); - /// \brief Fixed SGPR used to hold the wave scratch offset for the entire - /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set - /// or SGPR is not used or not known. Optional. - uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1); - - /// \brief Default constructor. - Metadata() = default; - - /// \returns True if kernel debug properties metadata is empty, false - /// otherwise. - bool empty() const { - return !notEmpty(); - } - - /// \returns True if kernel debug properties metadata is not empty, false - /// otherwise. - bool notEmpty() const { - return !mDebuggerABIVersion.empty(); - } -}; - -} // end namespace DebugProps - -namespace Key { -/// \brief Key for Kernel::Metadata::mName. -constexpr char Name[] = "Name"; -/// \brief Key for Kernel::Metadata::mLanguage. -constexpr char Language[] = "Language"; -/// \brief Key for Kernel::Metadata::mLanguageVersion. -constexpr char LanguageVersion[] = "LanguageVersion"; -/// \brief Key for Kernel::Metadata::mAttrs. -constexpr char Attrs[] = "Attrs"; -/// \brief Key for Kernel::Metadata::mArgs. -constexpr char Args[] = "Args"; -/// \brief Key for Kernel::Metadata::mCodeProps. -constexpr char CodeProps[] = "CodeProps"; -/// \brief Key for Kernel::Metadata::mDebugProps. -constexpr char DebugProps[] = "DebugProps"; -} // end namespace Key - -/// \brief In-memory representation of kernel metadata. -struct Metadata final { - /// \brief Name. Required. - std::string mName = std::string(); - /// \brief Language. Optional. - std::string mLanguage = std::string(); - /// \brief Language version. Optional. - std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>(); - /// \brief Attributes metadata. Optional. - Attrs::Metadata mAttrs = Attrs::Metadata(); - /// \brief Arguments metadata. Optional. - std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>(); - /// \brief Code properties metadata. Optional. - CodeProps::Metadata mCodeProps = CodeProps::Metadata(); - /// \brief Debug properties metadata. Optional. - DebugProps::Metadata mDebugProps = DebugProps::Metadata(); - - /// \brief Default constructor. - Metadata() = default; -}; - -} // end namespace Kernel - -namespace Key { -/// \brief Key for CodeObject::Metadata::mVersion. -constexpr char Version[] = "Version"; -/// \brief Key for CodeObject::Metadata::mPrintf. -constexpr char Printf[] = "Printf"; -/// \brief Key for CodeObject::Metadata::mKernels. -constexpr char Kernels[] = "Kernels"; -} // end namespace Key - -/// \brief In-memory representation of code object metadata. -struct Metadata final { - /// \brief Code object metadata version. Required. - std::vector<uint32_t> mVersion = std::vector<uint32_t>(); - /// \brief Printf metadata. Optional. - std::vector<std::string> mPrintf = std::vector<std::string>(); - /// \brief Kernels metadata. Optional. - std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>(); - - /// \brief Default constructor. - Metadata() = default; - - /// \brief Converts \p YamlString to \p CodeObjectMetadata. - static std::error_code fromYamlString(std::string YamlString, - Metadata &CodeObjectMetadata); - - /// \brief Converts \p CodeObjectMetadata to \p YamlString. - static std::error_code toYamlString(Metadata CodeObjectMetadata, - std::string &YamlString); -}; - -} // end namespace CodeObject -} // end namespace AMDGPU -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 647017d5061d..4e828a791e09 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -13,20 +13,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadataStreamer.h" +#include "AMDGPU.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" -#include "llvm/Support/YAMLTraits.h" - -using namespace llvm::AMDGPU; -using namespace llvm::AMDGPU::CodeObject; - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -37,192 +29,7 @@ static cl::opt<bool> VerifyCodeObjectMetadata( "amdgpu-verify-comd", cl::desc("Verify AMDGPU Code Object Metadata")); -namespace yaml { - -template <> -struct ScalarEnumerationTraits<AccessQualifier> { - static void enumeration(IO &YIO, AccessQualifier &EN) { - YIO.enumCase(EN, "Default", AccessQualifier::Default); - YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); - YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); - YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); - } -}; - -template <> -struct ScalarEnumerationTraits<AddressSpaceQualifier> { - static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { - YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); - YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); - YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); - YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); - YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); - YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); - } -}; - -template <> -struct ScalarEnumerationTraits<ValueKind> { - static void enumeration(IO &YIO, ValueKind &EN) { - YIO.enumCase(EN, "ByValue", ValueKind::ByValue); - YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); - YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); - YIO.enumCase(EN, "Sampler", ValueKind::Sampler); - YIO.enumCase(EN, "Image", ValueKind::Image); - YIO.enumCase(EN, "Pipe", ValueKind::Pipe); - YIO.enumCase(EN, "Queue", ValueKind::Queue); - YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); - YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); - YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); - YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); - YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); - YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); - YIO.enumCase(EN, "HiddenCompletionAction", - ValueKind::HiddenCompletionAction); - } -}; - -template <> -struct ScalarEnumerationTraits<ValueType> { - static void enumeration(IO &YIO, ValueType &EN) { - YIO.enumCase(EN, "Struct", ValueType::Struct); - YIO.enumCase(EN, "I8", ValueType::I8); - YIO.enumCase(EN, "U8", ValueType::U8); - YIO.enumCase(EN, "I16", ValueType::I16); - YIO.enumCase(EN, "U16", ValueType::U16); - YIO.enumCase(EN, "F16", ValueType::F16); - YIO.enumCase(EN, "I32", ValueType::I32); - YIO.enumCase(EN, "U32", ValueType::U32); - YIO.enumCase(EN, "F32", ValueType::F32); - YIO.enumCase(EN, "I64", ValueType::I64); - YIO.enumCase(EN, "U64", ValueType::U64); - YIO.enumCase(EN, "F64", ValueType::F64); - } -}; - -template <> -struct MappingTraits<Kernel::Attrs::Metadata> { - static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { - YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, - MD.mReqdWorkGroupSize, std::vector<uint32_t>()); - YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, - MD.mWorkGroupSizeHint, std::vector<uint32_t>()); - YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, - MD.mVecTypeHint, std::string()); - } -}; - -template <> -struct MappingTraits<Kernel::Arg::Metadata> { - static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { - YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); - YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); - YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); - YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); - YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, - uint32_t(0)); - YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, - AccessQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, - AddressSpaceQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); - YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); - YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); - YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); - YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); - YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); - } -}; - -template <> -struct MappingTraits<Kernel::CodeProps::Metadata> { - static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, - MD.mKernargSegmentSize, uint64_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, - MD.mWorkgroupGroupSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, - MD.mWorkitemPrivateSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, - MD.mWavefrontNumSGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, - MD.mWorkitemNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, - MD.mKernargSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, - MD.mGroupSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, - MD.mPrivateSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, - MD.mWavefrontSize, uint8_t(0)); - } -}; - -template <> -struct MappingTraits<Kernel::DebugProps::Metadata> { - static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { - YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, - MD.mDebuggerABIVersion, std::vector<uint32_t>()); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, - MD.mReservedNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, - MD.mReservedFirstVGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, - MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, - MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); - } -}; - -template <> -struct MappingTraits<Kernel::Metadata> { - static void mapping(IO &YIO, Kernel::Metadata &MD) { - YIO.mapRequired(Kernel::Key::Name, MD.mName); - YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); - YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, - std::vector<uint32_t>()); - if (!MD.mAttrs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); - if (!MD.mArgs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Args, MD.mArgs); - if (!MD.mCodeProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); - if (!MD.mDebugProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); - } -}; - -template <> -struct MappingTraits<CodeObject::Metadata> { - static void mapping(IO &YIO, CodeObject::Metadata &MD) { - YIO.mapRequired(Key::Version, MD.mVersion); - YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>()); - if (!MD.mKernels.empty() || !YIO.outputting()) - YIO.mapOptional(Key::Kernels, MD.mKernels); - } -}; - -} // end namespace yaml - namespace AMDGPU { - -/* static */ -std::error_code CodeObject::Metadata::fromYamlString( - std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) { - yaml::Input YamlInput(YamlString); - YamlInput >> CodeObjectMetadata; - return YamlInput.error(); -} - -/* static */ -std::error_code CodeObject::Metadata::toYamlString( - CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) { - raw_string_ostream YamlStream(YamlString); - yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max()); - YamlOutput << CodeObjectMetadata; - return std::error_code(); -} - namespace CodeObject { void MetadataStreamer::dump(StringRef YamlString) const { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h index 8d4c51763f63..c6681431d74d 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -17,9 +17,9 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #include "AMDGPU.h" -#include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" #include "llvm/Support/ErrorOr.h" namespace llvm { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 073d19422e86..6abe7f3d37d5 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8dc863f723e2..2a0032fc9adc 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUTargetStreamer.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -25,7 +26,6 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" namespace llvm { @@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } } -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n'; -} - -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; -} - bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) @@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); } -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast<MCSymbolELF>( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_LOCAL); -} - -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast<MCSymbolELF>( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_GLOBAL); -} - bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 5c588bbded9c..968128e94d0b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -44,10 +44,6 @@ public: virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; - virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0; - - virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitStartOfCodeObjectMetadata(const Module &Mod); virtual void EmitKernelCodeObjectMetadata( @@ -74,10 +70,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; @@ -105,10 +97,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 6015ec190fd4..eab90e1d344c 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "R600Defines.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index 0e4eda982139..f6f2582aa11b 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -80,50 +80,53 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"SI", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx600", SIFullSpeedModel, + [FeatureISAVersion6_0_0]>; + +def : ProcessorModel<"SI", SIFullSpeedModel, + [FeatureISAVersion6_0_0] +>; + +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureISAVersion6_0_0] >; -def : ProcessorModel<"tahiti", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx601", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1] >; -def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"bonaire", SIQuarterSpeedModel, +def : ProcessorModel<"gfx700", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"kabini", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2] +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, + [FeatureISAVersion7_0_0] >; def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"hawaii", SIFullSpeedModel, +def : ProcessorModel<"gfx701", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; -def : ProcessorModel<"mullins", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2]>; - -def : ProcessorModel<"gfx700", SIQuarterSpeedModel, - [FeatureISAVersion7_0_0] ->; - -def : ProcessorModel<"gfx701", SIFullSpeedModel, +def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; @@ -131,6 +134,17 @@ def : ProcessorModel<"gfx702", SIQuarterSpeedModel, [FeatureISAVersion7_0_2] >; +def : ProcessorModel<"gfx703", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"kabini", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"mullins", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3]>; + //===----------------------------------------------------------------------===// // Volcanic Islands //===----------------------------------------------------------------------===// @@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel, [FeatureISAVersion8_1_0] >; -def : ProcessorModel<"gfx900", SIQuarterSpeedModel, - [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32] +//===----------------------------------------------------------------------===// +// GFX9 +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"gfx900", SIQuarterSpeedModel, + [FeatureISAVersion9_0_0] +>; + +def : ProcessorModel<"gfx901", SIQuarterSpeedModel, + [FeatureISAVersion9_0_1] +>; + +def : ProcessorModel<"gfx902", SIQuarterSpeedModel, + [FeatureISAVersion9_0_2] >; -def : ProcessorModel<"gfx901", SIQuarterSpeedModel, - [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32] +def : ProcessorModel<"gfx903", SIQuarterSpeedModel, + [FeatureISAVersion9_0_3] >; + diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 09b328765604..6993e8a62a9c 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -12,15 +12,14 @@ /// computing their address on the fly ; it also sets STACK_SIZE info. //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,6 +29,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index 03fc1aff5ec1..0d8ccd088ec4 100644 --- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -15,10 +15,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 5c30a0734f0d..66def2d29caf 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -15,11 +15,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp index 1f01ad732e00..37787b3c5f72 100644 --- a/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -10,8 +10,8 @@ #include "R600FrameLowering.h" #include "AMDGPUSubtarget.h" #include "R600RegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/MathExtras.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 60b913cfd39a..c55878f8bff0 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { assert(Store->getAlignment() >= 2); - Mask = DAG.getConstant(0xffff, DL, MVT::i32);; + Mask = DAG.getConstant(0xffff, DL, MVT::i32); } else { llvm_unreachable("Unsupported private trunc store"); } diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 2422d57269eb..c5da5e404200 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "R600InstrInfo.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" -#include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/BitVector.h" @@ -35,8 +35,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> -#include <cstring> #include <cstdint> +#include <cstring> #include <iterator> #include <utility> #include <vector> diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index db18e5bd1afa..47fda1c8fa82 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index 3e957126b497..1cb40938cee7 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" @@ -24,6 +23,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp index 62ebef8e91af..b5c439b21b89 100644 --- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -19,8 +19,8 @@ // //===----------------------------------------------------------------------===// -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 3cca815d8773..5f5f25103c02 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -65,10 +65,10 @@ /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index dfac068d1f69..e10f1ed3762e 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -730,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { // Make sure sources are identical. const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() || + if (!Src0->isReg() || !Src1->isReg() || + Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 97bb0f0c0656..b1bd14e421f0 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,10 +8,10 @@ //==-----------------------------------------------------------------------===// #include "SIFrameLowering.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index b48b23911105..599ee942d738 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,12 +17,12 @@ #define _USE_MATH_DEFINES #endif +#include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUIntrinsicInfo.h" -#include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" #include "SIDefines.h" -#include "SIISelLowering.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" @@ -2604,7 +2604,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16); - return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);; + return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc); } SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c10badba88f3..0f009a48754a 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -229,7 +229,7 @@ public: MachineInstr &MI); BlockWaitcntBrackets() - : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false), + : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false), LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 36d29b8ecf06..58c05cf16f15 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -20,10 +20,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 35d3a93d8710..5f1c7f1fc42f 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -60,8 +60,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 3680e02da576..ba616ada0c9c 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 348bb4fa0260..9fdb8caac6f2 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -15,8 +15,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #include "AMDGPUMachineFunction.h" -#include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index 9d4e677400e6..bb17dbbdfbd6 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "SIMachineScheduler.h" #include "AMDGPU.h" #include "SIInstrInfo.h" -#include "SIMachineScheduler.h" #include "SIRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index fae249b04492..f4ddf1891683 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -20,13 +20,12 @@ /// //===----------------------------------------------------------------------===// - #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include <unordered_map> @@ -129,7 +128,8 @@ public: bool getNeg() const { return Neg; } bool getSext() const { return Sext; } - uint64_t getSrcMods() const; + uint64_t getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const; }; class SDWADstOperand : public SDWAOperand { @@ -240,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg, return SuperMask.all(); } -uint64_t SDWASrcOperand::getSrcMods() const { +uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const { uint64_t Mods = 0; + const auto *MI = SrcOp->getParent(); + if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { + Mods = Mod->getImm(); + } + } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { + Mods = Mod->getImm(); + } + } if (Abs || Neg) { assert(!Sext && "Float and integer src modifiers can't be set simulteniously"); Mods |= Abs ? SISrcMods::ABS : 0; - Mods |= Neg ? SISrcMods::NEG : 0; + Mods ^= Neg ? SISrcMods::NEG : 0; } else if (Sext) { Mods |= SISrcMods::SEXT; } @@ -312,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { } copyRegOperand(*Src, *getTargetOperand()); SrcSel->setImm(getSrcSel()); - SrcMods->setImm(getSrcMods()); + SrcMods->setImm(getSrcMods(TII, Src)); getTargetOperand()->setIsKill(false); return true; } @@ -409,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { switch (Opcode) { case AMDGPU::V_LSHRREV_B32_e32: case AMDGPU::V_ASHRREV_I32_e32: - case AMDGPU::V_LSHLREV_B32_e32: { + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_LSHLREV_B32_e64: { // from: v_lshrrev_b32_e32 v1, 16/24, v0 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 @@ -432,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B32_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || + Opcode == AMDGPU::V_LSHLREV_B32_e64) { auto SDWADst = make_unique<SDWADstOperand>( Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -441,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique<SDWASrcOperand>( Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, - Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B32_e32 && + Opcode != AMDGPU::V_LSHRREV_B32_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -451,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { case AMDGPU::V_LSHRREV_B16_e32: case AMDGPU::V_ASHRREV_I16_e32: - case AMDGPU::V_LSHLREV_B16_e32: { + case AMDGPU::V_LSHLREV_B16_e32: + case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_LSHLREV_B16_e64: { // from: v_lshrrev_b16_e32 v1, 8, v0 // to SDWA src:v0 src_sel:BYTE_1 @@ -472,7 +491,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B16_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || + Opcode == AMDGPU::V_LSHLREV_B16_e64) { auto SDWADst = make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -481,7 +501,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique<SDWASrcOperand>( Src1, Dst, BYTE_1, false, false, - Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B16_e32 && + Opcode != AMDGPU::V_LSHRREV_B16_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -549,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { ++NumSDWAPatternsFound; break; } - case AMDGPU::V_AND_B32_e32: { + case AMDGPU::V_AND_B32_e32: + case AMDGPU::V_AND_B32_e64: { // e.g.: // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 // to SDWA src:v0 src_sel:WORD_0/BYTE_0 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + auto ValSrc = Src1; auto Imm = foldToImm(*Src0); - if (!Imm) - break; - if (*Imm != 0x0000ffff && *Imm != 0x000000ff) + if (!Imm) { + Imm = foldToImm(*Src1); + ValSrc = Src0; + } + + if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) break; - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (TRI->isPhysicalRegister(Src1->getReg()) || @@ -570,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { break; auto SDWASrc = make_unique<SDWASrcOperand>( - Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); + ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -583,28 +609,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const { // Check if this instruction has opcode that supports SDWA - return AMDGPU::getSDWAOp(MI.getOpcode()) != -1; + unsigned Opc = MI.getOpcode(); + if (AMDGPU::getSDWAOp(Opc) != -1) + return true; + int Opc32 = AMDGPU::getVOPe32(Opc); + if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) + return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && + !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + return false; } bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands) { // Convert to sdwa int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); + if (SDWAOpcode == -1) + SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); assert(SDWAOpcode != -1); + // Copy dst, if it is present in original then should also be present in SDWA + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + if (!Dst && !TII->isVOPC(MI)) + return false; + const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); // Create SDWA version of instruction MI and initialize its operands MachineInstrBuilder SDWAInst = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); - // Copy dst, if it is present in original then should also be present in SDWA - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); - } else { - assert(TII->isVOPC(MI)); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, Src0 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src0); // Copy src1 if present, initialize src1_modifiers. @@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, assert( AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src1); - } else { - assert(TII->isVOP1(MI)); } if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || @@ -746,8 +786,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { PotentialMatches.clear(); SDWAOperands.clear(); + bool Ret = !ConvertedInstructions.empty(); while (!ConvertedInstructions.empty()) legalizeScalarOperands(*ConvertedInstructions.pop_back_val()); - return false; + return Ret; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 6fb01a09fe13..b611f28fcabd 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "SIRegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -1104,6 +1104,66 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } } +StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { + #define AMDGPU_REG_ASM_NAMES + #include "AMDGPURegAsmNames.inc.cpp" + + #define REG_RANGE(BeginReg, EndReg, RegTable) \ + if (Reg >= BeginReg && Reg <= EndReg) { \ + unsigned Index = Reg - BeginReg; \ + assert(Index < array_lengthof(RegTable)); \ + return RegTable[Index]; \ + } + + REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); + REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, + VGPR96RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, + AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, + VGPR128RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, + AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, + SGPR128RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, + AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR256RegNames); + + REG_RANGE( + AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, + AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR512RegNames); + + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, + AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR256RegNames); + + REG_RANGE( + AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, + AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR512RegNames + ); + +#undef REG_RANGE + + // FIXME: Rename flat_scr so we don't need to special case this. + switch (Reg) { + case AMDGPU::FLAT_SCR: + return "flat_scratch"; + case AMDGPU::FLAT_SCR_LO: + return "flat_scratch_lo"; + case AMDGPU::FLAT_SCR_HI: + return "flat_scratch_hi"; + default: + // For the special named registers the default is fine. + return TargetRegisterInfo::getRegAsmName(Reg); + } +} + // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index a648c178101a..8fed6d5f9710 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -16,8 +16,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { @@ -118,6 +118,8 @@ public: bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const; + StringRef getRegAsmName(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 630f469eabf0..f581e69980c7 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUBaseInfo.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <algorithm> @@ -38,7 +38,6 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" - #define GET_INSTRINFO_NAMED_OPS #include "AMDGPUGenInstrInfo.inc" #undef GET_INSTRINFO_NAMED_OPS @@ -104,6 +103,11 @@ namespace AMDGPU { namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { + // SI. + if (Features.test(FeatureISAVersion6_0_0)) + return {6, 0, 0}; + if (Features.test(FeatureISAVersion6_0_1)) + return {6, 0, 1}; // CI. if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; @@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 1}; if (Features.test(FeatureISAVersion7_0_2)) return {7, 0, 2}; + if (Features.test(FeatureISAVersion7_0_3)) + return {7, 0, 3}; // VI. if (Features.test(FeatureISAVersion8_0_0)) @@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {9, 0, 0}; if (Features.test(FeatureISAVersion9_0_1)) return {9, 0, 1}; + if (Features.test(FeatureISAVersion9_0_2)) + return {9, 0, 2}; + if (Features.test(FeatureISAVersion9_0_3)) + return {9, 0, 3}; if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) return {0, 0, 0}; @@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } -MCSection *getHSATextSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_EXECINSTR | - ELF::SHF_AMDGPU_HSA_AGENT | - ELF::SHF_AMDGPU_HSA_CODE); -} - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL | - ELF::SHF_AMDGPU_HSA_AGENT); -} - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL); -} - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | - ELF::SHF_AMDGPU_HSA_AGENT); -} - bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 19888ad7556a..eff0230d21f5 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); -MCSection *getHSATextSection(MCContext &Ctx); - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx); - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 77fc9551cff9..a8ca593f14ed 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>; def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>; def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>; -def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; -def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; +def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbit>; +def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>; def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>; def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>; def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>; @@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>; + +let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>; +} // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; @@ -232,8 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>; let SubtargetPredicate = isCIVI in { +let Constraints = "@earlyclobber $vdst" in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>; +} // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; |