summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td13
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp143
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h20
-rw-r--r--lib/Target/AMDGPU/AMDKernelCodeT.h121
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp314
-rw-r--r--lib/Target/AMDGPU/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/LLVMBuild.txt4
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp12
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp11
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h4
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp22
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h5
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp297
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h77
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/Makefile2
-rw-r--r--lib/Target/AMDGPU/Processors.td15
-rw-r--r--lib/Target/AMDGPU/R600Defines.h4
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp11
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/R600MachineFunctionInfo.h2
-rw-r--r--lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp2
-rw-r--r--lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp2
-rw-r--r--lib/Target/AMDGPU/SIDefines.h31
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp11
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp27
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp60
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h34
-rw-r--r--lib/Target/AMDGPU/Utils/CMakeLists.txt3
-rw-r--r--lib/Target/AMDGPU/Utils/LLVMBuild.txt23
-rw-r--r--lib/Target/AMDGPU/Utils/Makefile16
40 files changed, 1050 insertions, 264 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 2e7e39a54d33..569ad3844b25 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -141,6 +141,19 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
+ : SubtargetFeature <
+ "isaver"#Major#"."#Minor#"."#Stepping,
+ "IsaVersion",
+ "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
+ "Instruction set version number"
+>;
+
+def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
+def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>;
+def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>;
+def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>;
+
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index afc6bcb52bb8..709d7531d38b 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,7 +17,9 @@
//
#include "AMDGPUAsmPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
@@ -89,6 +91,15 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
+void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
+ const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
+ SIProgramInfo KernelInfo;
+ if (STM.isAmdHsaOS()) {
+ getSIProgramInfo(KernelInfo, *MF);
+ EmitAmdKernelCodeT(*MF, KernelInfo);
+ }
+}
+
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
// This label is used to mark the end of the .text section.
@@ -113,13 +124,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
- if (STM.isAmdHsaOS()) {
- getSIProgramInfo(KernelInfo, MF);
- EmitAmdKernelCodeT(MF, KernelInfo);
- OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
- } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- getSIProgramInfo(KernelInfo, MF);
- EmitProgramInfoSI(MF, KernelInfo);
+ if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (!STM.isAmdHsaOS()) {
+ getSIProgramInfo(KernelInfo, MF);
+ EmitProgramInfoSI(MF, KernelInfo);
+ }
+ // Emit directives
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->EmitDirectiveHSACodeObjectVersion(1, 0);
+ AMDGPU::IsaVersion ISA = STM.getIsaVersion();
+ TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
+ "AMD", "AMDGPU");
} else {
EmitProgramInfoR600(MF);
}
@@ -459,125 +475,28 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
}
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const {
+ const SIProgramInfo &KernelInfo) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
amd_kernel_code_t header;
- memset(&header, 0, sizeof(header));
-
- header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
- header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
-
- header.struct_byte_size = sizeof(amd_kernel_code_t);
-
- header.target_chip = STM.getAmdKernelCodeChipID();
-
- header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
+ AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
header.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
+ header.code_properties =
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+ AMD_CODE_PROPERTY_IS_PTR64;
- // Code Properties:
- header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
- AMD_CODE_PROPERTY_IS_PTR64;
-
- if (KernelInfo.FlatUsed)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
-
- if (KernelInfo.ScratchBlocks)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
-
- header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
- header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
-
- // MFI->ABIArgOffset is the number of bytes for the kernel arguments
- // plus 36. 36 is the number of bytes reserved at the begining of the
- // input buffer to store work-group size information.
- // FIXME: We should be adding the size of the implicit arguments
- // to this value.
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
-
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
- // FIXME: What values do I put for these alignments
- header.kernarg_segment_alignment = 0;
- header.group_segment_alignment = 0;
- header.private_segment_alignment = 0;
-
- header.code_type = 1; // HSA_EXT_CODE_KERNEL
-
- header.wavefront_size = STM.getWavefrontSize();
-
- MCSectionELF *VersionSection =
- OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(VersionSection);
- OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
- Twine(header.hsail_version_major) + "." +
- Twine(header.hsail_version_minor) + ":" +
- "AMD:" +
- Twine(header.amd_code_version_major) + "." +
- Twine(header.amd_code_version_minor) + ":" +
- "GFX8.1:0").str());
-
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-
- if (isVerbose()) {
- OutStreamer->emitRawComment("amd_code_version_major = " +
- Twine(header.amd_code_version_major), false);
- OutStreamer->emitRawComment("amd_code_version_minor = " +
- Twine(header.amd_code_version_minor), false);
- OutStreamer->emitRawComment("struct_byte_size = " +
- Twine(header.struct_byte_size), false);
- OutStreamer->emitRawComment("target_chip = " +
- Twine(header.target_chip), false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
- false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
- false);
- OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
- OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
- OutStreamer->emitRawComment("private_element_size = 2 ", false);
- OutStreamer->emitRawComment("is_ptr64 = " +
- Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
- OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
- Twine(header.workitem_private_segment_byte_size),
- false);
- OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
- Twine(header.workgroup_group_segment_byte_size),
- false);
- OutStreamer->emitRawComment("gds_segment_byte_size = " +
- Twine(header.gds_segment_byte_size), false);
- OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
- Twine(header.kernarg_segment_byte_size), false);
- OutStreamer->emitRawComment("wavefront_sgpr_count = " +
- Twine(header.wavefront_sgpr_count), false);
- OutStreamer->emitRawComment("workitem_vgpr_count = " +
- Twine(header.workitem_vgpr_count), false);
- OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
- OutStreamer->emitRawComment("wavefront_size = " +
- Twine((int)header.wavefront_size), false);
- OutStreamer->emitRawComment("optimization_level = " +
- Twine(header.optimization_level), false);
- OutStreamer->emitRawComment("hsail_profile = " +
- Twine(header.hsail_profile), false);
- OutStreamer->emitRawComment("hsail_machine_model = " +
- Twine(header.hsail_machine_model), false);
- OutStreamer->emitRawComment("hsail_version_major = " +
- Twine(header.hsail_version_major), false);
- OutStreamer->emitRawComment("hsail_version_minor = " +
- Twine(header.hsail_version_minor), false);
- }
- OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->EmitAMDKernelCodeT(header);
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 92072512e6b5..345af9b85e15 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -97,6 +97,8 @@ public:
/// Implemented in AMDGPUMCInstLower.cpp
void EmitInstruction(const MachineInstr *MI) override;
+ void EmitFunctionBodyStart() override;
+
void EmitEndOfAsmFile(Module &M) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -108,6 +110,6 @@ protected:
size_t DisasmLineMaxLen;
};
-} // namespace llvm
+} // End anonymous llvm
#endif
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 570473d85585..d56838ec2019 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -68,7 +68,7 @@ public:
};
int DiagnosticInfoUnsupported::KindID = 0;
-} // namespace
+}
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 31ae9a3c7760..86d3962b3856 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -198,7 +198,7 @@ namespace AMDGPU {
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
} // End namespace AMDGPU
-} // namespace llvm
+} // End llvm namespace
#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index e17b41ad5f21..f5e4694e76f6 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -41,5 +41,5 @@ public:
bool IsKernel;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 605ccd0e1361..0779d1d786b2 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -72,6 +72,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
+ IsaVersion(ISAVersion0_0_0),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
@@ -109,6 +110,10 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
}
}
+AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const {
+ return AMDGPU::getIsaVersion(getFeatureBits());
+}
+
bool AMDGPUSubtarget::isVGPRSpillingEnabled(
const SIMachineFunctionInfo *MFI) const {
return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
@@ -131,3 +136,4 @@ void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.OnlyBottomUp = false;
}
}
+
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0d40d14f8203..30f50eb1d2f3 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -20,6 +20,8 @@
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600ISelLowering.h"
+#include "AMDKernelCodeT.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -48,6 +50,14 @@ public:
FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
};
+ enum {
+ ISAVersion0_0_0,
+ ISAVersion7_0_0,
+ ISAVersion7_0_1,
+ ISAVersion8_0_0,
+ ISAVersion8_0_1
+ };
+
private:
std::string DevName;
bool Is64bit;
@@ -77,6 +87,7 @@ private:
bool CIInsts;
bool FeatureDisable;
int LDSBankCount;
+ unsigned IsaVersion;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -236,6 +247,8 @@ public:
unsigned getAmdKernelCodeChipID() const;
+ AMDGPU::IsaVersion getIsaVersion() const;
+
bool enableMachineScheduler() const override {
return true;
}
@@ -275,6 +288,13 @@ public:
bool enableSubRegLiveness() const override {
return true;
}
+
+ /// \brief Returns the offset in bytes from the start of the input buffer
+ /// of the first explicit kernel argument.
+ unsigned getExplicitKernelArgOffset() const {
+ return isAmdHsaOS() ? 0 : 36;
+ }
+
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/AMDKernelCodeT.h b/lib/Target/AMDGPU/AMDKernelCodeT.h
index eaffb854793c..a9ba60c8cbad 100644
--- a/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -12,9 +12,12 @@
#ifndef AMDKERNELCODET_H
#define AMDKERNELCODET_H
+#include "llvm/MC/SubtargetFeature.h"
+
#include <cstddef>
#include <cstdint>
+#include "llvm/Support/Debug.h"
//---------------------------------------------------------------------------//
// AMD Kernel Code, and its dependencies //
//---------------------------------------------------------------------------//
@@ -142,7 +145,7 @@ enum amd_code_property_mask_t {
/// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
/// is generally DWORD.
///
- /// Use values from the amd_element_byte_size_t enum.
+ /// uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
@@ -171,7 +174,11 @@ enum amd_code_property_mask_t {
/// Indicate if code generated has support for debugging.
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
+
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 15,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT
};
/// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
@@ -369,7 +376,7 @@ typedef struct hsa_ext_control_directives_s {
/// Scratch Wave Offset must be added by the kernel code and moved to
/// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
///
-/// The second SGPR is 32 bit byte size of a single work-item’s scratch
+/// The second SGPR is 32 bit byte size of a single work-item's scratch
/// memory usage. This is directly loaded from the dispatch packet Private
/// Segment Byte Size and rounded up to a multiple of DWORD.
///
@@ -385,7 +392,7 @@ typedef struct hsa_ext_control_directives_s {
///
/// Private Segment Size (enable_sgpr_private_segment_size):
/// Number of User SGPR registers: 1. The 32 bit byte size of a single
-/// work-item’s scratch memory allocation. This is the value from the dispatch
+/// work-item's scratch memory allocation. This is the value from the dispatch
/// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
///
/// \todo [Does CP need to round this to >4 byte alignment?]
@@ -433,7 +440,7 @@ typedef struct hsa_ext_control_directives_s {
/// present
///
/// Work-Group Info (enable_sgpr_workgroup_info):
-/// Number of System SGPR registers: 1. {first_wave, 14’b0000,
+/// Number of System SGPR registers: 1. {first_wave, 14'b0000,
/// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
///
/// Private Segment Wave Byte Offset
@@ -499,25 +506,14 @@ typedef struct hsa_ext_control_directives_s {
/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
/// the kernarg segment is constant for the duration of the kernel execution.
///
-typedef struct amd_kernel_code_s {
- /// The AMD major version of the Code Object. Must be the value
- /// AMD_CODE_VERSION_MAJOR.
- amd_code_version32_t amd_code_version_major;
- /// The AMD minor version of the Code Object. Minor versions must be
- /// backward compatible. Must be the value
- /// AMD_CODE_VERSION_MINOR.
- amd_code_version32_t amd_code_version_minor;
-
- /// The byte size of this struct. Must be set to
- /// sizeof(amd_kernel_code_t). Used for backward
- /// compatibility.
- uint32_t struct_byte_size;
-
- /// The target chip instruction set for which code has been
- /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
- /// in sc/Interface/SCCommon.h.
- uint32_t target_chip;
+typedef struct amd_kernel_code_s {
+ uint32_t amd_kernel_code_version_major;
+ uint32_t amd_kernel_code_version_minor;
+ uint16_t amd_machine_kind;
+ uint16_t amd_machine_version_major;
+ uint16_t amd_machine_version_minor;
+ uint16_t amd_machine_version_stepping;
/// Byte offset (possibly negative) from start of amd_kernel_code_t
/// object to kernel's entry point instruction. The actual code for
@@ -535,10 +531,6 @@ typedef struct amd_kernel_code_s {
/// and size. The offset is from the start (possibly negative) of
/// amd_kernel_code_t object. Set both to 0 if no prefetch
/// information is available.
- ///
- /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
- /// not make the size a uint64_t as prefetching more than 4GiB seems
- /// excessive.
int64_t kernel_code_prefetch_byte_offset;
uint64_t kernel_code_prefetch_byte_size;
@@ -553,11 +545,11 @@ typedef struct amd_kernel_code_s {
/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
/// COMPUTE_PGM_RSRC2 registers.
- amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
+ uint64_t compute_pgm_resource_registers;
/// Code properties. See amd_code_property_mask_t for a full list of
/// properties.
- amd_code_property32_t code_properties;
+ uint32_t code_properties;
/// The amount of memory required for the combined private, spill
/// and arg segments for a work-item in bytes. If
@@ -629,76 +621,21 @@ typedef struct amd_kernel_code_s {
/// The maximum byte alignment of variables used by the kernel in
/// the specified memory segment. Expressed as a power of two. Must
/// be at least HSA_POWERTWO_16.
- hsa_powertwo8_t kernarg_segment_alignment;
- hsa_powertwo8_t group_segment_alignment;
- hsa_powertwo8_t private_segment_alignment;
-
- uint8_t reserved3;
-
- /// Type of code object.
- hsa_ext_code_kind32_t code_type;
-
- /// Reserved for code properties if any are defined in the future.
- /// There are currently no code properties so this field must be 0.
- uint32_t reserved4;
+ uint8_t kernarg_segment_alignment;
+ uint8_t group_segment_alignment;
+ uint8_t private_segment_alignment;
/// Wavefront size expressed as a power of two. Must be a power of 2
/// in range 1..64 inclusive. Used to support runtime query that
/// obtains wavefront size, which may be used by application to
/// allocated dynamic group memory and set the dispatch work-group
/// size.
- hsa_powertwo8_t wavefront_size;
-
- /// The optimization level specified when the kernel was
- /// finalized.
- uint8_t optimization_level;
-
- /// The HSAIL profile defines which features are used. This
- /// information is from the HSAIL version directive. If this
- /// amd_kernel_code_t is not generated from an HSAIL compilation
- /// unit then must be 0.
- hsa_ext_brig_profile8_t hsail_profile;
-
- /// The HSAIL machine model gives the address sizes used by the
- /// code. This information is from the HSAIL version directive. If
- /// not generated from an HSAIL compilation unit then must still
- /// indicate for what machine mode the code is generated.
- hsa_ext_brig_machine_model8_t hsail_machine_model;
-
- /// The HSAIL major version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_major;
-
- /// The HSAIL minor version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_minor;
-
- /// Reserved for HSAIL target options if any are defined in the
- /// future. There are currently no target options so this field
- /// must be 0.
- uint16_t reserved5;
-
- /// Reserved. Must be 0.
- uint16_t reserved6;
-
- /// The values should be the actually values used by the finalizer
- /// in generating the code. This may be the union of values
- /// specified as finalizer arguments and explicit HSAIL control
- /// directives. If the finalizer chooses to ignore a control
- /// directive, and not generate constrained code, then the control
- /// directive should not be marked as enabled even though it was
- /// present in the HSAIL or finalizer argument. The values are
- /// intended to reflect the constraints that the code actually
- /// requires to correctly execute, not the values that were
- /// actually specified at finalize time.
- hsa_ext_control_directives_t control_directive;
-
- /// The code can immediately follow the amd_kernel_code_t, or can
- /// come after subsequent amd_kernel_code_t structs when there are
- /// multiple kernels in the compilation unit.
+ uint8_t wavefront_size;
+ int32_t call_convention;
+ uint8_t reserved3[12];
+ uint64_t runtime_loader_kernel_symbol;
+ uint64_t control_directives[16];
} amd_kernel_code_t;
#endif // AMDKERNELCODET_H
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 80081d40d089..2018983bc306 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,9 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
@@ -314,6 +317,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
+private:
+ bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
+ bool ParseDirectiveHSACodeObjectVersion();
+ bool ParseDirectiveHSACodeObjectISA();
+ bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+ bool ParseDirectiveAMDKernelCodeT();
+
public:
AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
@@ -329,6 +339,11 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
+ AMDGPUTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<AMDGPUTargetStreamer &>(TS);
+ }
+
unsigned getForcedEncodingSize() const {
return ForcedEncodingSize;
}
@@ -403,7 +418,7 @@ struct OptionalOperand {
bool (*ConvertResult)(int64_t&);
};
-} // namespace
+}
static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
if (IsVgpr) {
@@ -581,7 +596,304 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
+bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
+ uint32_t &Minor) {
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("invalid major version");
+
+ Major = getLexer().getTok().getIntVal();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("minor version number required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("invalid minor version");
+
+ Minor = getLexer().getTok().getIntVal();
+ Lex();
+
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
+
+ uint32_t Major;
+ uint32_t Minor;
+
+ if (ParseDirectiveMajorMinor(Major, Minor))
+ return true;
+
+ getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
+
+ uint32_t Major;
+ uint32_t Minor;
+ uint32_t Stepping;
+ StringRef VendorName;
+ StringRef ArchName;
+
+ // If this directive has no arguments, then use the ISA version for the
+ // targeted GPU.
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits());
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
+ Isa.Stepping,
+ "AMD", "AMDGPU");
+ return false;
+ }
+
+
+ if (ParseDirectiveMajorMinor(Major, Minor))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("stepping version number required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("invalid stepping version");
+
+ Stepping = getLexer().getTok().getIntVal();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("vendor name required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("invalid vendor name");
+
+ VendorName = getLexer().getTok().getStringContents();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("arch name required, comma expected");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("invalid arch name");
+
+ ArchName = getLexer().getTok().getStringContents();
+ Lex();
+
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
+ VendorName, ArchName);
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
+ amd_kernel_code_t &Header) {
+
+ if (getLexer().isNot(AsmToken::Equal))
+ return TokError("expected '='");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return TokError("amd_kernel_code_t values must be integers");
+
+ uint64_t Value = getLexer().getTok().getIntVal();
+ Lex();
+
+ if (ID == "kernel_code_version_major")
+ Header.amd_kernel_code_version_major = Value;
+ else if (ID == "kernel_code_version_minor")
+ Header.amd_kernel_code_version_minor = Value;
+ else if (ID == "machine_kind")
+ Header.amd_machine_kind = Value;
+ else if (ID == "machine_version_major")
+ Header.amd_machine_version_major = Value;
+ else if (ID == "machine_version_minor")
+ Header.amd_machine_version_minor = Value;
+ else if (ID == "machine_version_stepping")
+ Header.amd_machine_version_stepping = Value;
+ else if (ID == "kernel_code_entry_byte_offset")
+ Header.kernel_code_entry_byte_offset = Value;
+ else if (ID == "kernel_code_prefetch_byte_size")
+ Header.kernel_code_prefetch_byte_size = Value;
+ else if (ID == "max_scratch_backing_memory_byte_size")
+ Header.max_scratch_backing_memory_byte_size = Value;
+ else if (ID == "compute_pgm_rsrc1_vgprs")
+ Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value);
+ else if (ID == "compute_pgm_rsrc1_sgprs")
+ Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value);
+ else if (ID == "compute_pgm_rsrc1_priority")
+ Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value);
+ else if (ID == "compute_pgm_rsrc1_float_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value);
+ else if (ID == "compute_pgm_rsrc1_priv")
+ Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value);
+ else if (ID == "compute_pgm_rsrc1_dx10_clamp")
+ Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value);
+ else if (ID == "compute_pgm_rsrc1_debug_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value);
+ else if (ID == "compute_pgm_rsrc1_ieee_mode")
+ Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value);
+ else if (ID == "compute_pgm_rsrc2_scratch_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_user_sgpr")
+ Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_x_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_y_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tgid_z_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tg_size_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt")
+ Header.compute_pgm_resource_registers |=
+ (S_00B84C_TIDIG_COMP_CNT(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_excp_en_msb")
+ Header.compute_pgm_resource_registers |=
+ (S_00B84C_EXCP_EN_MSB(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_lds_size")
+ Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32);
+ else if (ID == "compute_pgm_rsrc2_excp_en")
+ Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32);
+ else if (ID == "compute_pgm_resource_registers")
+ Header.compute_pgm_resource_registers = Value;
+ else if (ID == "enable_sgpr_private_segment_buffer")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT);
+ else if (ID == "enable_sgpr_dispatch_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT);
+ else if (ID == "enable_sgpr_queue_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT);
+ else if (ID == "enable_sgpr_kernarg_segment_ptr")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT);
+ else if (ID == "enable_sgpr_dispatch_id")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT);
+ else if (ID == "enable_sgpr_flat_scratch_init")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT);
+ else if (ID == "enable_sgpr_private_segment_size")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_x")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_y")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT);
+ else if (ID == "enable_sgpr_grid_workgroup_count_z")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT);
+ else if (ID == "enable_ordered_append_gds")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT);
+ else if (ID == "private_element_size")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT);
+ else if (ID == "is_ptr64")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT);
+ else if (ID == "is_dynamic_callstack")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT);
+ else if (ID == "is_debug_enabled")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT);
+ else if (ID == "is_xnack_enabled")
+ Header.code_properties |=
+ (Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT);
+ else if (ID == "workitem_private_segment_byte_size")
+ Header.workitem_private_segment_byte_size = Value;
+ else if (ID == "workgroup_group_segment_byte_size")
+ Header.workgroup_group_segment_byte_size = Value;
+ else if (ID == "gds_segment_byte_size")
+ Header.gds_segment_byte_size = Value;
+ else if (ID == "kernarg_segment_byte_size")
+ Header.kernarg_segment_byte_size = Value;
+ else if (ID == "workgroup_fbarrier_count")
+ Header.workgroup_fbarrier_count = Value;
+ else if (ID == "wavefront_sgpr_count")
+ Header.wavefront_sgpr_count = Value;
+ else if (ID == "workitem_vgpr_count")
+ Header.workitem_vgpr_count = Value;
+ else if (ID == "reserved_vgpr_first")
+ Header.reserved_vgpr_first = Value;
+ else if (ID == "reserved_vgpr_count")
+ Header.reserved_vgpr_count = Value;
+ else if (ID == "reserved_sgpr_first")
+ Header.reserved_sgpr_first = Value;
+ else if (ID == "reserved_sgpr_count")
+ Header.reserved_sgpr_count = Value;
+ else if (ID == "debug_wavefront_private_segment_offset_sgpr")
+ Header.debug_wavefront_private_segment_offset_sgpr = Value;
+ else if (ID == "debug_private_segment_buffer_sgpr")
+ Header.debug_private_segment_buffer_sgpr = Value;
+ else if (ID == "kernarg_segment_alignment")
+ Header.kernarg_segment_alignment = Value;
+ else if (ID == "group_segment_alignment")
+ Header.group_segment_alignment = Value;
+ else if (ID == "private_segment_alignment")
+ Header.private_segment_alignment = Value;
+ else if (ID == "wavefront_size")
+ Header.wavefront_size = Value;
+ else if (ID == "call_convention")
+ Header.call_convention = Value;
+ else if (ID == "runtime_loader_kernel_symbol")
+ Header.runtime_loader_kernel_symbol = Value;
+ else
+ return TokError("amd_kernel_code_t value not recognized.");
+
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
+
+ amd_kernel_code_t Header;
+ AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
+
+ while (true) {
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("amd_kernel_code_t values must begin on a new line");
+
+ // Lex EndOfStatement. This is in a while loop, because lexing a comment
+ // will set the current token to EndOfStatement.
+ while(getLexer().is(AsmToken::EndOfStatement))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected value identifier or .end_amd_kernel_code_t");
+
+ StringRef ID = getLexer().getTok().getIdentifier();
+ Lex();
+
+ if (ID == ".end_amd_kernel_code_t")
+ break;
+
+ if (ParseAMDKernelCodeTValue(ID, Header))
+ return true;
+ }
+
+ getTargetStreamer().EmitAMDKernelCodeT(Header);
+
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getString();
+
+ if (IDVal == ".hsa_code_object_version")
+ return ParseDirectiveHSACodeObjectVersion();
+
+ if (IDVal == ".hsa_code_object_isa")
+ return ParseDirectiveHSACodeObjectISA();
+
+ if (IDVal == ".amd_kernel_code_t")
+ return ParseDirectiveAMDKernelCodeT();
+
return true;
}
diff --git a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
index 63d44d1e06f1..dab0c6f585af 100644
--- a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = AMDGPUAsmParser
parent = AMDGPU
-required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo Support
+required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo AMDGPUUtils Support
add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 3e5ff1f3c6d4..9460bf6b9338 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -62,3 +62,4 @@ add_subdirectory(AsmParser)
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
+add_subdirectory(Utils)
diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt
index c6861df91ed6..38c5489586f1 100644
--- a/lib/Target/AMDGPU/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo Utils
[component_0]
type = TargetGroup
@@ -29,5 +29,5 @@ has_asmprinter = 1
type = Library
name = AMDGPUCodeGen
parent = AMDGPU
-required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo Scalar SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils
add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 8bed2deef4cd..468563c44982 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -127,11 +127,14 @@ bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
namespace {
class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+ bool Is64Bit;
+
public:
- ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+ ELFAMDGPUAsmBackend(const Target &T, bool Is64Bit) :
+ AMDGPUAsmBackend(T), Is64Bit(Is64Bit) { }
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
- return createAMDGPUELFObjectWriter(OS);
+ return createAMDGPUELFObjectWriter(Is64Bit, OS);
}
};
@@ -140,5 +143,8 @@ public:
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU) {
- return new ELFAMDGPUAsmBackend(T);
+ Triple TargetTriple(TT);
+
+ // Use 64-bit ELF for amdgcn
+ return new ELFAMDGPUAsmBackend(T, TargetTriple.getArch() == Triple::amdgcn);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 59f45ff02d88..820f17df8960 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -18,7 +18,7 @@ namespace {
class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
public:
- AMDGPUELFObjectWriter();
+ AMDGPUELFObjectWriter(bool Is64Bit);
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override {
@@ -30,10 +30,11 @@ protected:
} // End anonymous namespace
-AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
- : MCELFObjectTargetWriter(false, 0, 0, false) { }
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit)
+ : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA,
+ ELF::EM_AMDGPU, false) { }
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
- MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) {
+ MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit);
return createELFObjectWriter(MOTW, OS, true);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index fa3b3c3d9489..01021d67ffd9 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
-} // namespace AMDGPU
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index a7d3dd1345f9..7172e4bb9335 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -14,6 +14,7 @@
#include "AMDGPUMCTargetDesc.h"
#include "AMDGPUMCAsmInfo.h"
+#include "AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIDefines.h"
#include "llvm/MC/MCCodeGenInfo.h"
@@ -72,6 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
+static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new AMDGPUTargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer * createAMDGPUObjectTargetStreamer(
+ MCStreamer &S,
+ const MCSubtargetInfo &STI) {
+ return new AMDGPUTargetELFStreamer(S);
+}
+
extern "C" void LLVMInitializeAMDGPUTargetMC() {
for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
@@ -84,7 +98,15 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
}
+ // R600 specific registration
TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
createR600MCCodeEmitter);
+
+ // GCN specific registration
TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
+
+ TargetRegistry::RegisterAsmTargetStreamer(TheGCNTarget,
+ createAMDGPUAsmTargetStreamer);
+ TargetRegistry::RegisterObjectTargetStreamer(TheGCNTarget,
+ createAMDGPUObjectTargetStreamer);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index ac611b862a1a..5d1b86b8c0c2 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -46,8 +46,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU);
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
-} // namespace llvm
+MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
+ raw_pwrite_stream &OS);
+} // End llvm namespace
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
new file mode 100644
index 000000000000..09e6cb1f1ffc
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -0,0 +1,297 @@
+//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AMDGPU specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetStreamer.h"
+#include "SIDefines.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S) { }
+
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetAsmStreamer
+//===----------------------------------------------------------------------===//
+
+AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS)
+ : AMDGPUTargetStreamer(S), OS(OS) { }
+
+void
+AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) {
+ OS << "\t.hsa_code_object_version " <<
+ Twine(Major) << "," << Twine(Minor) << '\n';
+}
+
+void
+AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
+ uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) {
+ OS << "\t.hsa_code_object_isa " <<
+ Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
+ ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
+
+}
+
+void
+AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+ uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
+ bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ bool EnableSGPRDispatchPtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ bool EnableSGPRQueuePtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ bool EnableSGPRDispatchID = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ bool EnableSGPRFlatScratchInit = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
+ bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
+ bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
+ bool EnableOrderedAppendGDS = (Header.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
+ uint32_t PrivateElementSize = (Header.code_properties &
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
+ bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
+ bool IsDynamicCallstack = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
+ bool IsDebugEnabled = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
+ bool IsXNackEnabled = (Header.code_properties &
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
+
+ OS << "\t.amd_kernel_code_t\n" <<
+ "\t\tkernel_code_version_major = " <<
+ Header.amd_kernel_code_version_major << '\n' <<
+ "\t\tkernel_code_version_minor = " <<
+ Header.amd_kernel_code_version_minor << '\n' <<
+ "\t\tmachine_kind = " <<
+ Header.amd_machine_kind << '\n' <<
+ "\t\tmachine_version_major = " <<
+ Header.amd_machine_version_major << '\n' <<
+ "\t\tmachine_version_minor = " <<
+ Header.amd_machine_version_minor << '\n' <<
+ "\t\tmachine_version_stepping = " <<
+ Header.amd_machine_version_stepping << '\n' <<
+ "\t\tkernel_code_entry_byte_offset = " <<
+ Header.kernel_code_entry_byte_offset << '\n' <<
+ "\t\tkernel_code_prefetch_byte_size = " <<
+ Header.kernel_code_prefetch_byte_size << '\n' <<
+ "\t\tmax_scratch_backing_memory_byte_size = " <<
+ Header.max_scratch_backing_memory_byte_size << '\n' <<
+ "\t\tcompute_pgm_rsrc1_vgprs = " <<
+ G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_sgprs = " <<
+ G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_priority = " <<
+ G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_float_mode = " <<
+ G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_priv = " <<
+ G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
+ G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_debug_mode = " <<
+ G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
+ G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_scratch_en = " <<
+ G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
+ G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
+ G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
+ G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
+ G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
+ G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
+ G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
+ G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_lds_size = " <<
+ G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
+ "\t\tcompute_pgm_rsrc2_excp_en = " <<
+ G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
+
+ "\t\tenable_sgpr_private_segment_buffer = " <<
+ EnableSGPRPrivateSegmentBuffer << '\n' <<
+ "\t\tenable_sgpr_dispatch_ptr = " <<
+ EnableSGPRDispatchPtr << '\n' <<
+ "\t\tenable_sgpr_queue_ptr = " <<
+ EnableSGPRQueuePtr << '\n' <<
+ "\t\tenable_sgpr_kernarg_segment_ptr = " <<
+ EnableSGPRKernargSegmentPtr << '\n' <<
+ "\t\tenable_sgpr_dispatch_id = " <<
+ EnableSGPRDispatchID << '\n' <<
+ "\t\tenable_sgpr_flat_scratch_init = " <<
+ EnableSGPRFlatScratchInit << '\n' <<
+ "\t\tenable_sgpr_private_segment_size = " <<
+ EnableSGPRPrivateSegmentSize << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_x = " <<
+ EnableSGPRGridWorkgroupCountX << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_y = " <<
+ EnableSGPRGridWorkgroupCountY << '\n' <<
+ "\t\tenable_sgpr_grid_workgroup_count_z = " <<
+ EnableSGPRGridWorkgroupCountZ << '\n' <<
+ "\t\tenable_ordered_append_gds = " <<
+ EnableOrderedAppendGDS << '\n' <<
+ "\t\tprivate_element_size = " <<
+ PrivateElementSize << '\n' <<
+ "\t\tis_ptr64 = " <<
+ IsPtr64 << '\n' <<
+ "\t\tis_dynamic_callstack = " <<
+ IsDynamicCallstack << '\n' <<
+ "\t\tis_debug_enabled = " <<
+ IsDebugEnabled << '\n' <<
+ "\t\tis_xnack_enabled = " <<
+ IsXNackEnabled << '\n' <<
+ "\t\tworkitem_private_segment_byte_size = " <<
+ Header.workitem_private_segment_byte_size << '\n' <<
+ "\t\tworkgroup_group_segment_byte_size = " <<
+ Header.workgroup_group_segment_byte_size << '\n' <<
+ "\t\tgds_segment_byte_size = " <<
+ Header.gds_segment_byte_size << '\n' <<
+ "\t\tkernarg_segment_byte_size = " <<
+ Header.kernarg_segment_byte_size << '\n' <<
+ "\t\tworkgroup_fbarrier_count = " <<
+ Header.workgroup_fbarrier_count << '\n' <<
+ "\t\twavefront_sgpr_count = " <<
+ Header.wavefront_sgpr_count << '\n' <<
+ "\t\tworkitem_vgpr_count = " <<
+ Header.workitem_vgpr_count << '\n' <<
+ "\t\treserved_vgpr_first = " <<
+ Header.reserved_vgpr_first << '\n' <<
+ "\t\treserved_vgpr_count = " <<
+ Header.reserved_vgpr_count << '\n' <<
+ "\t\treserved_sgpr_first = " <<
+ Header.reserved_sgpr_first << '\n' <<
+ "\t\treserved_sgpr_count = " <<
+ Header.reserved_sgpr_count << '\n' <<
+ "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
+ Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
+ "\t\tdebug_private_segment_buffer_sgpr = " <<
+ Header.debug_private_segment_buffer_sgpr << '\n' <<
+ "\t\tkernarg_segment_alignment = " <<
+ (uint32_t)Header.kernarg_segment_alignment << '\n' <<
+ "\t\tgroup_segment_alignment = " <<
+ (uint32_t)Header.group_segment_alignment << '\n' <<
+ "\t\tprivate_segment_alignment = " <<
+ (uint32_t)Header.private_segment_alignment << '\n' <<
+ "\t\twavefront_size = " <<
+ (uint32_t)Header.wavefront_size << '\n' <<
+ "\t\tcall_convention = " <<
+ Header.call_convention << '\n' <<
+ "\t\truntime_loader_kernel_symbol = " <<
+ Header.runtime_loader_kernel_symbol << '\n' <<
+ // TODO: control_directives
+ "\t.end_amd_kernel_code_t\n";
+
+}
+
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetELFStreamer
+//===----------------------------------------------------------------------===//
+
+AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
+ : AMDGPUTargetStreamer(S), Streamer(S) { }
+
+MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+}
+
+void
+AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) {
+ MCStreamer &OS = getStreamer();
+ MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
+
+ unsigned NameSZ = 4;
+
+ OS.PushSection();
+ OS.SwitchSection(Note);
+ OS.EmitIntValue(NameSZ, 4); // namesz
+ OS.EmitIntValue(8, 4); // descz
+ OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
+ OS.EmitBytes(StringRef("AMD", NameSZ)); // name
+ OS.EmitIntValue(Major, 4); // desc
+ OS.EmitIntValue(Minor, 4);
+ OS.EmitValueToAlignment(4);
+ OS.PopSection();
+}
+
+void
+AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
+ uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) {
+ MCStreamer &OS = getStreamer();
+ MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
+
+ unsigned NameSZ = 4;
+ uint16_t VendorNameSize = VendorName.size() + 1;
+ uint16_t ArchNameSize = ArchName.size() + 1;
+ unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
+ sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
+ VendorNameSize + ArchNameSize;
+
+ OS.PushSection();
+ OS.SwitchSection(Note);
+ OS.EmitIntValue(NameSZ, 4); // namesz
+ OS.EmitIntValue(DescSZ, 4); // descsz
+ OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type
+ OS.EmitBytes(StringRef("AMD", 4)); // name
+ OS.EmitIntValue(VendorNameSize, 2); // desc
+ OS.EmitIntValue(ArchNameSize, 2);
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ OS.EmitIntValue(Stepping, 4);
+ OS.EmitBytes(VendorName);
+ OS.EmitIntValue(0, 1); // NULL terminate VendorName
+ OS.EmitBytes(ArchName);
+ OS.EmitIntValue(0, 1); // NULL terminte ArchName
+ OS.EmitValueToAlignment(4);
+ OS.PopSection();
+}
+
+void
+AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+
+ MCStreamer &OS = getStreamer();
+ OS.PushSection();
+ OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
+ OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
+ OS.PopSection();
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
new file mode 100644
index 000000000000..d37677c6b863
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -0,0 +1,77 @@
+//===-- AMDGPUTargetStreamer.h - AMDGPU Target Streamer --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDKernelCodeT.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+namespace llvm {
+
+class MCELFStreamer;
+
+class AMDGPUTargetStreamer : public MCTargetStreamer {
+public:
+ AMDGPUTargetStreamer(MCStreamer &S);
+ virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) = 0;
+
+ virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping,
+ StringRef VendorName,
+ StringRef ArchName) = 0;
+
+ virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
+};
+
+class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
+ formatted_raw_ostream &OS;
+public:
+ AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+ void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) override;
+
+ void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping, StringRef VendorName,
+ StringRef ArchName) override;
+
+ void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+};
+
+class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+
+ enum NoteType {
+ NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
+ NT_AMDGPU_HSA_HSAIL = 2,
+ NT_AMDGPU_HSA_ISA = 3,
+ NT_AMDGPU_HSA_PRODUCER = 4,
+ NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
+ NT_AMDGPU_HSA_EXTENSION = 6,
+ NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
+ NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
+ };
+
+ MCStreamer &Streamer;
+
+public:
+ AMDGPUTargetELFStreamer(MCStreamer &S);
+
+ MCELFStreamer &getStreamer();
+
+ void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+ uint32_t Minor) override;
+
+ void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+ uint32_t Stepping, StringRef VendorName,
+ StringRef ArchName) override;
+
+ void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+
+};
+
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 151d0d5f83de..8306a051ff98 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMAMDGPUDesc
AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUMCAsmInfo.cpp
+ AMDGPUTargetStreamer.cpp
R600MCCodeEmitter.cpp
SIMCCodeEmitter.cpp
)
diff --git a/lib/Target/AMDGPU/Makefile b/lib/Target/AMDGPU/Makefile
index 2e2de5020867..219f34daa24f 100644
--- a/lib/Target/AMDGPU/Makefile
+++ b/lib/Target/AMDGPU/Makefile
@@ -18,6 +18,6 @@ BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
AMDGPUGenAsmWriter.inc AMDGPUGenAsmMatcher.inc
-DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc Utils
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index c0ffede51999..69efb8b8bc43 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -104,7 +104,7 @@ def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
//===----------------------------------------------------------------------===//
def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount32]
+ [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
>;
def : ProcessorModel<"kabini", SIQuarterSpeedModel,
@@ -112,11 +112,12 @@ def : ProcessorModel<"kabini", SIQuarterSpeedModel,
>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount32]
+ [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
>;
def : ProcessorModel<"hawaii", SIFullSpeedModel,
- [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
+ [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32,
+ FeatureISAVersion7_0_1]
>;
def : ProcessorModel<"mullins", SIQuarterSpeedModel,
@@ -127,11 +128,13 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel,
//===----------------------------------------------------------------------===//
def : ProcessorModel<"tonga", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug]
+ [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
>;
def : ProcessorModel<"iceland", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug]
+ [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
>;
-def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+>;
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
index 6ff0a2204cfa..51d87eda31d1 100644
--- a/lib/Target/AMDGPU/R600Defines.h
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -48,7 +48,7 @@ namespace R600_InstFlag {
IS_EXPORT = (1 << 17),
LDS_1A2D = (1 << 18)
};
-} // namespace R600_InstFlag
+}
#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
@@ -138,7 +138,7 @@ namespace OpName {
VEC_COUNT
};
-} // namespace OpName
+}
//===----------------------------------------------------------------------===//
// Config register definitions
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index c25287806988..c06d3c4fd309 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -75,6 +75,6 @@ private:
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
};
-} // namespace llvm
+} // End namespace llvm;
#endif
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 5ef883cbcadd..855fa9fe45b2 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -697,15 +697,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Most of the following comes from the ARM implementation of AnalyzeBranch
// If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
+
// AMDGPU::BRANCH* instructions are only available after isel and are not
// handled
if (isBranch(I->getOpcode()))
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index 9c5f76c882f1..dee4c2b9ae31 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -298,6 +298,6 @@ int getLDSNoRetOp(uint16_t Opcode);
} //End namespace AMDGPU
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index f5556c1e81fc..263561edd30d 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -29,6 +29,6 @@ public:
unsigned StackSize;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index a1a1b4043429..0c06ccc736d0 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -375,7 +375,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
return false;
}
-} // namespace
+}
llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) {
return new R600VectorRegMerger(tm);
diff --git a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
index 93bcf680a022..2fc7b02f673f 100644
--- a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
@@ -296,7 +296,7 @@ public:
char R600TextureIntrinsicsReplacer::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createR600TextureIntrinsicsReplacer() {
return new R600TextureIntrinsicsReplacer();
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index f1b4ba1ac07d..4c3263911c40 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -39,7 +39,7 @@ enum {
WQM = 1 << 20,
VGPRSpill = 1 << 21
};
-} // namespace SIInstrFlags
+}
namespace llvm {
namespace AMDGPU {
@@ -74,7 +74,7 @@ namespace SIInstrFlags {
P_NORMAL = 1 << 8, // Positive normal
P_INFINITY = 1 << 9 // Positive infinity
};
-} // namespace SIInstrFlags
+}
namespace SISrcMods {
enum {
@@ -100,16 +100,41 @@ namespace SIOutMods {
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
+
#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
#define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
+#define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1)
+#define C_00B84C_SCRATCH_EN 0xFFFFFFFE
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
+#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
+#define C_00B84C_USER_SGPR 0xFFFFFFC1
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
+#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
+#define C_00B84C_TGID_X_EN 0xFFFFFF7F
#define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8)
+#define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1)
+#define C_00B84C_TGID_Y_EN 0xFFFFFEFF
#define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9)
+#define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1)
+#define C_00B84C_TGID_Z_EN 0xFFFFFDFF
#define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10)
+#define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1)
+#define C_00B84C_TG_SIZE_EN 0xFFFFFBFF
#define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11)
-
+#define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03)
+#define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF
+/* CIK */
+#define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13)
+#define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03)
+#define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF
+/* */
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
+#define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF)
+#define C_00B84C_LDS_SIZE 0xFF007FFF
+#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
+#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
+#define C_00B84C_EXCP_EN
+
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 12d08cf4c7f5..ead1a3743473 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -583,7 +583,8 @@ SDValue SITargetLowering::LowerFormalArguments(
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = Splits[i].VT;
- const unsigned Offset = 36 + VA.getLocMemOffset();
+ const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+ VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
@@ -2211,8 +2212,9 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
std::pair<unsigned, const TargetRegisterClass *>
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ const std::string &Constraint_,
MVT VT) const {
+ StringRef Constraint(Constraint_);
if (Constraint == "r") {
switch(VT.SimpleTy) {
default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
@@ -2232,8 +2234,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
if (RC) {
- unsigned Idx = std::atoi(Constraint.substr(2).c_str());
- if (Idx < RC->getNumRegs())
+ uint32_t Idx;
+ bool Failed = Constraint.substr(2).getAsInteger(10, Idx);
+ if (!Failed && Idx < RC->getNumRegs())
return std::make_pair(RC->getRegister(Idx), RC);
}
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 47bc17823b3f..eb96bd0227b2 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -440,22 +440,22 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
const unsigned Opcode = MI.getOpcode();
int NewOpc;
// Try to map original to commuted opcode
NewOpc = AMDGPU::getCommuteRev(Opcode);
- // Check if the commuted (REV) opcode exists on the target.
- if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
- return NewOpc;
+ if (NewOpc != -1)
+ // Check if the commuted (REV) opcode exists on the target.
+ return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
// Try to map commuted to original opcode
NewOpc = AMDGPU::getCommuteOrig(Opcode);
- // Check if the original (non-REV) opcode exists on the target.
- if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
- return NewOpc;
+ if (NewOpc != -1)
+ // Check if the original (non-REV) opcode exists on the target.
+ return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
return Opcode;
}
@@ -771,6 +771,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
if (MI->getNumOperands() < 3)
return nullptr;
+ int CommutedOpcode = commuteOpcode(*MI);
+ if (CommutedOpcode == -1)
+ return nullptr;
+
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0);
assert(Src0Idx != -1 && "Should always have src0 operand");
@@ -833,7 +837,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
}
if (MI)
- MI->setDesc(get(commuteOpcode(*MI)));
+ MI->setDesc(get(CommutedOpcode));
return MI;
}
@@ -2716,8 +2720,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
- if (ST.isAmdHsaOS())
+ if (ST.isAmdHsaOS()) {
RsrcDataFormat |= (1ULL << 56);
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ // Set MTYPE = 2
+ RsrcDataFormat |= (2ULL << 59);
+ }
+
return RsrcDataFormat;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 6fafb945c993..0382272068d2 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -117,7 +117,7 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- unsigned commuteOpcode(const MachineInstr &MI) const;
+ int commuteOpcode(const MachineInstr &MI) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const override;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 93e4ca74ec38..fcb58d5da3b0 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1740,7 +1740,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
ClampMod:$clamp,
omod:$omod),
- " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
+ "$dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
new file mode 100644
index 000000000000..b76b4007003f
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -0,0 +1,60 @@
+//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUBaseInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
+
+namespace llvm {
+namespace AMDGPU {
+
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+
+ if (Features.test(FeatureISAVersion7_0_0))
+ return {7, 0, 0};
+
+ if (Features.test(FeatureISAVersion7_0_1))
+ return {7, 0, 1};
+
+ if (Features.test(FeatureISAVersion8_0_0))
+ return {8, 0, 0};
+
+ if (Features.test(FeatureISAVersion8_0_1))
+ return {8, 0, 1};
+
+ return {0, 0, 0};
+}
+
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const FeatureBitset &Features) {
+
+ IsaVersion ISA = getIsaVersion(Features);
+
+ memset(&Header, 0, sizeof(Header));
+
+ Header.amd_kernel_code_version_major = 1;
+ Header.amd_kernel_code_version_minor = 0;
+ Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
+ Header.amd_machine_version_major = ISA.Major;
+ Header.amd_machine_version_minor = ISA.Minor;
+ Header.amd_machine_version_stepping = ISA.Stepping;
+ Header.kernel_code_entry_byte_offset = sizeof(Header);
+ // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
+ Header.wavefront_size = 6;
+ // These alignment values are specified in powers of two, so alignment =
+ // 2^n. The minimum alignment is 2^4 = 16.
+ Header.kernarg_segment_alignment = 4;
+ Header.group_segment_alignment = 4;
+ Header.private_segment_alignment = 4;
+}
+
+} // End namespace AMDGPU
+} // End namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
new file mode 100644
index 000000000000..f57028cc5bfd
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -0,0 +1,34 @@
+//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+
+#include "AMDKernelCodeT.h"
+
+namespace llvm {
+
+class FeatureBitset;
+
+namespace AMDGPU {
+
+struct IsaVersion {
+ unsigned Major;
+ unsigned Minor;
+ unsigned Stepping;
+};
+
+IsaVersion getIsaVersion(const FeatureBitset &Features);
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const FeatureBitset &Features);
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AMDGPU/Utils/CMakeLists.txt b/lib/Target/AMDGPU/Utils/CMakeLists.txt
new file mode 100644
index 000000000000..2c07aeab7dd3
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMAMDGPUUtils
+ AMDGPUBaseInfo.cpp
+ )
diff --git a/lib/Target/AMDGPU/Utils/LLVMBuild.txt b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
new file mode 100644
index 000000000000..dec5360e3bc7
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AMDGPU/Utils/LLVMBuild.txt ------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDGPUUtils
+parent = AMDGPU
+required_libraries = Support
+add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/Utils/Makefile b/lib/Target/AMDGPU/Utils/Makefile
new file mode 100644
index 000000000000..1019e726d50e
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDGPU/Utils/Makefile --------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDGPUUtils
+
+# Hack: we need to include 'main' AMDGPU target directory to grab private
+# headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common