aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/MCTargetDesc
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/MCTargetDesc')
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp73
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h13
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp100
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h7
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp41
12 files changed, 205 insertions, 75 deletions
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 1f94ab799122..ea6e9038fd1e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -39,8 +40,8 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
bool mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const override;
@@ -53,12 +54,13 @@ public:
} //End anonymous namespace
-void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void AMDGPUAsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
+ MCInst Res;
unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode());
Res.setOpcode(RelaxedOpcode);
Res.addOperand(Inst.getOperand(0));
+ Inst = std::move(Res);
return;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index d352219a7a98..619fde74e88d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -6,8 +6,10 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUFixupKinds.h"
#include "AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
@@ -80,6 +82,15 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_ABS64;
}
+ if (Fixup.getTargetKind() == AMDGPU::fixup_si_sopp_br) {
+ const auto *SymA = Target.getSymA();
+ assert(SymA);
+
+ Ctx.reportError(Fixup.getLoc(),
+ Twine("undefined label '") + SymA->getSymbol().getName() + "'");
+ return ELF::R_AMDGPU_NONE;
+ }
+
llvm_unreachable("unhandled relocation type");
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f65dc25d7eec..fe063d33ea3e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -26,6 +27,28 @@
using namespace llvm;
using namespace llvm::AMDGPU;
+static cl::opt<bool> Keep16BitSuffixes(
+ "amdgpu-keep-16-bit-reg-suffixes",
+ cl::desc("Keep .l and .h suffixes in asm for debugging purposes"),
+ cl::init(false),
+ cl::ReallyHidden);
+
+void AMDGPUInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ // FIXME: The current implementation of
+ // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
+ // as an integer or we provide a name which represents a physical register.
+ // For CFI instructions we really want to emit a name for the DWARF register
+ // instead, because there may be multiple DWARF registers corresponding to a
+ // single physical register. One case where this problem manifests is with
+ // wave32/wave64 where using the physical register name is ambiguous: if we
+ // write e.g. `.cfi_undefined v0` we lose information about the wavefront
+ // size which we need to encode the register in the final DWARF. Ideally we
+ // would extend MC to support parsing DWARF register names so we could do
+ // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
+ // non-pretty DWARF register names in assembly text.
+ OS << RegNo;
+}
+
void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &OS) {
@@ -164,10 +187,10 @@ void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
printU32ImmOperand(MI, OpNo, STI, O);
}
-void AMDGPUInstPrinter::printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- printU32ImmOperand(MI, OpNo, STI, O);
+ O << formatHex(MI->getOperand(OpNo).getImm());
}
void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
@@ -244,6 +267,11 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "r128");
}
+void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ printNamedBit(MI, OpNo, O, "a16");
+}
+
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "lwe");
@@ -287,7 +315,6 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
switch (RegNo) {
case AMDGPU::FP_REG:
case AMDGPU::SP_REG:
- case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
case AMDGPU::PRIVATE_RSRC_REG:
llvm_unreachable("pseudo-register should not ever be emitted");
case AMDGPU::SCC:
@@ -297,7 +324,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
}
#endif
- O << getRegisterName(RegNo);
+ StringRef RegName(getRegisterName(RegNo));
+ if (!Keep16BitSuffixes)
+ if (!RegName.consume_back(".l"))
+ RegName.consume_back(".h");
+
+ O << RegName;
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
@@ -346,11 +378,21 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, STI, O);
}
+void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ int16_t SImm = static_cast<int16_t>(Imm);
+ if (isInlinableIntLiteral(SImm))
+ O << SImm;
+ else
+ O << formatHex(static_cast<uint64_t>(Imm));
+}
+
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
- if (SImm >= -16 && SImm <= 64) {
+ if (isInlinableIntLiteral(SImm)) {
O << SImm;
return;
}
@@ -518,7 +560,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
printRegOperand(Op.getReg(), O, MRI);
} else if (Op.isImm()) {
- switch (Desc.OpInfo[OpNo].OperandType) {
+ const uint8_t OpTy = Desc.OpInfo[OpNo].OperandType;
+ switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
@@ -535,10 +578,12 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printImmediate64(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_INT16:
+ printImmediateInt16(Op.getImm(), STI, O);
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
@@ -549,11 +594,19 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printImmediate32(Op.getImm(), STI, O);
break;
}
+
+ // Deal with 16-bit FP inline immediates not working.
+ if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) {
+ printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O);
+ break;
+ }
LLVM_FALLTHROUGH;
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O);
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
printImmediateV216(Op.getImm(), STI, O);
break;
case MCOI::OPERAND_UNKNOWN:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index ba53003e9041..6dfd23ea72e6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -23,6 +23,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
//Autogenerated by tblgen
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
@@ -60,7 +61,7 @@ private:
raw_ostream &O);
void printSMRDOffset8(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+ void printSMEMOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -86,6 +87,8 @@ private:
raw_ostream &O);
void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printGFX10A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printD16(const MCInst *MI, unsigned OpNo,
@@ -102,8 +105,12 @@ private:
raw_ostream &O);
void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
@@ -112,6 +119,10 @@ private:
raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ printOperand(MI, OpNum, STI, O);
+ }
void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 9644e66fda4e..687cfef4559f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -43,6 +43,9 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
WeakRefDirective = ".weakref\t";
//===--- Dwarf Emission Directives -----------------------------------===//
SupportsDebugInformation = true;
+ DwarfRegNumForCFI = true;
+
+ UseIntegratedAssembler = false;
}
bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 62757a707890..d7d8c8181b02 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -51,6 +51,12 @@ public:
return 0;
}
+ virtual unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 0;
+ }
+
virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 9507836c64c2..7d3235efc59e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -61,7 +61,13 @@ static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
if (TT.getArch() == Triple::r600)
InitR600MCRegisterInfo(X, 0);
else
- InitAMDGPUMCRegisterInfo(X, 0);
+ InitAMDGPUMCRegisterInfo(X, AMDGPU::PC_REG);
+ return X;
+}
+
+MCRegisterInfo *llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAMDGPUMCRegisterInfo(X, AMDGPU::PC_REG, DwarfFlavour);
return X;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 9754d31fee60..b9cdbc6502e5 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -33,6 +33,10 @@ class Target;
class Triple;
class raw_pwrite_stream;
+enum AMDGPUDwarfFlavour { Wave64 = 0, Wave32 = 1 };
+
+MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour);
+
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index fef665c2900e..3d202d7960d6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -43,7 +43,7 @@ using namespace llvm::AMDGPU::HSAMD;
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
- if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
+ if (HSAMD::fromString(std::string(HSAMetadataString), HSAMetadata))
return false;
return EmitHSAMetadata(HSAMetadata);
@@ -97,6 +97,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
@@ -148,6 +149,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
+ case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@@ -210,9 +212,9 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
}
void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
- unsigned Align) {
- OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align
- << '\n';
+ Align Alignment) {
+ OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
+ << Alignment.value() << '\n';
}
bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
@@ -393,9 +395,9 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
-AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
- MCStreamer &S, const MCSubtargetInfo &STI)
- : AMDGPUTargetStreamer(S), Streamer(S) {
+AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI)
+ : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned EFlags = MCA.getELFHeaderEFlags();
@@ -427,7 +429,7 @@ void AMDGPUTargetELFStreamer::finish() {
if (Blob.empty())
return;
EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
- [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); });
+ [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
}
void AMDGPUTargetELFStreamer::EmitNote(
@@ -438,16 +440,22 @@ void AMDGPUTargetELFStreamer::EmitNote(
auto NameSZ = Name.size() + 1;
+ unsigned NoteFlags = 0;
+ // TODO Apparently, this is currently needed for OpenCL as mentioned in
+ // https://reviews.llvm.org/D74995
+ if (Os == Triple::AMDHSA)
+ NoteFlags = ELF::SHF_ALLOC;
+
S.PushSection();
- S.SwitchSection(Context.getELFSection(
- ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
- S.EmitIntValue(NameSZ, 4); // namesz
- S.EmitValue(DescSZ, 4); // descz
- S.EmitIntValue(NoteType, 4); // type
- S.EmitBytes(Name); // name
- S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
+ S.SwitchSection(
+ Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
+ S.emitInt32(NameSZ); // namesz
+ S.emitValue(DescSZ, 4); // descz
+ S.emitInt32(NoteType); // type
+ S.emitBytes(Name); // name
+ S.emitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
- S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
+ S.emitValueToAlignment(4, 0, 1, 0); // padding 0
S.PopSection();
}
@@ -458,8 +466,8 @@ void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
+ OS.emitInt32(Major);
+ OS.emitInt32(Minor);
});
}
@@ -478,15 +486,15 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
- OS.EmitIntValue(VendorNameSize, 2);
- OS.EmitIntValue(ArchNameSize, 2);
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- OS.EmitIntValue(Stepping, 4);
- OS.EmitBytes(VendorName);
- OS.EmitIntValue(0, 1); // NULL terminate VendorName
- OS.EmitBytes(ArchName);
- OS.EmitIntValue(0, 1); // NULL terminte ArchName
+ OS.emitInt16(VendorNameSize);
+ OS.emitInt16(ArchNameSize);
+ OS.emitInt32(Major);
+ OS.emitInt32(Minor);
+ OS.emitInt32(Stepping);
+ OS.emitBytes(VendorName);
+ OS.emitInt8(0); // NULL terminate VendorName
+ OS.emitBytes(ArchName);
+ OS.emitInt8(0); // NULL terminte ArchName
});
}
@@ -495,7 +503,7 @@ AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
MCStreamer &OS = getStreamer();
OS.PushSection();
- OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
+ OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
OS.PopSection();
}
@@ -507,9 +515,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
}
void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
- unsigned Align) {
- assert(isPowerOf2_32(Align));
-
+ Align Alignment) {
MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
SymbolELF->setType(ELF::STT_OBJECT);
@@ -518,7 +524,7 @@ void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
SymbolELF->setExternal(true);
}
- if (SymbolELF->declareCommon(Size, Align, true)) {
+ if (SymbolELF->declareCommon(Size, Alignment.value(), true)) {
report_fatal_error("Symbol: " + Symbol->getName() +
" redeclared as different type");
}
@@ -539,9 +545,9 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
[&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(IsaVersionString);
- OS.EmitLabel(DescEnd);
+ OS.emitLabel(DescBegin);
+ OS.emitBytes(IsaVersionString);
+ OS.emitLabel(DescEnd);
});
return true;
}
@@ -566,9 +572,9 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
[&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(HSAMetadataString);
- OS.EmitLabel(DescEnd);
+ OS.emitLabel(DescBegin);
+ OS.emitBytes(HSAMetadataString);
+ OS.emitLabel(DescEnd);
});
return true;
}
@@ -590,9 +596,9 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
[&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(HSAMetadataString);
- OS.EmitLabel(DescEnd);
+ OS.emitLabel(DescBegin);
+ OS.emitBytes(HSAMetadataString);
+ OS.emitLabel(DescEnd);
});
return true;
}
@@ -602,9 +608,9 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
MCStreamer &OS = getStreamer();
OS.PushSection();
- OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
+ OS.emitValueToAlignment(64, Encoded_s_code_end, 4);
for (unsigned I = 0; I < 48; ++I)
- OS.EmitIntValue(Encoded_s_code_end, 4);
+ OS.emitInt32(Encoded_s_code_end);
OS.PopSection();
return true;
}
@@ -637,22 +643,22 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
- Streamer.EmitLabel(KernelDescriptorSymbol);
- Streamer.EmitBytes(StringRef(
+ Streamer.emitLabel(KernelDescriptorSymbol);
+ Streamer.emitBytes(StringRef(
(const char*)&(KernelDescriptor),
offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
// FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)
// It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
- Streamer.EmitValue(MCBinaryExpr::createSub(
+ Streamer.emitValue(MCBinaryExpr::createSub(
MCSymbolRefExpr::create(
KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
MCSymbolRefExpr::create(
KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
Context),
sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
- Streamer.EmitBytes(StringRef(
+ Streamer.emitBytes(StringRef(
(const char*)&(KernelDescriptor) +
offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 683b3e363b9a..a19d4646deb2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -54,7 +54,7 @@ public:
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
- unsigned Align) = 0;
+ Align Alignment) = 0;
/// \returns True on success, false on failure.
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
@@ -110,7 +110,7 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
@@ -133,6 +133,7 @@ public:
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
+ Triple::OSType Os;
void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc);
@@ -157,7 +158,7 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 2f1f4e7a0392..f61470573050 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -47,7 +47,7 @@ public:
/// Encode the instruction and write it to the OS.
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI) const override;
/// \returns the encoding for an MCOperand.
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index f8ec3c36f019..2cd6c3a81d2b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPURegisterInfo.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -71,6 +70,10 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+ unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -105,6 +108,11 @@ static uint32_t getIntInlineImmEncoding(IntTy Imm) {
return 0;
}
+static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) {
+ uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
+ return IntImm == 0 ? 255 : IntImm;
+}
+
static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
if (IntImm != 0)
@@ -249,23 +257,27 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
-
case AMDGPU::OPERAND_REG_IMM_V2INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16: {
if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+ if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
+ return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
LLVM_FALLTHROUGH;
+ }
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
uint32_t Encoding = getLit16Encoding(Lo16, STI);
@@ -359,6 +371,15 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
return getMachineOpValue(MI, MO, Fixups, STI);
}
+unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ auto Offset = MI.getOperand(OpNo).getImm();
+ // VI only supports 20-bit unsigned offsets.
+ assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset));
+ return Offset;
+}
+
unsigned
SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -419,7 +440,13 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
// instructions use acc[0:1] modifier bits to distinguish. These bits are
// encoded as a virtual 9th bit of the register for these operands.
if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg))
+ MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
Enc |= 512;
return Enc;