diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/MCTargetDesc')
12 files changed, 205 insertions, 75 deletions
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 1f94ab799122..ea6e9038fd1e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/TargetRegistry.h" #include "Utils/AMDGPUBaseInfo.h" @@ -39,8 +40,8 @@ public: const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override; - void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - MCInst &Res) const override; + void relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override; bool mayNeedRelaxation(const MCInst &Inst, const MCSubtargetInfo &STI) const override; @@ -53,12 +54,13 @@ public: } //End anonymous namespace -void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI, - MCInst &Res) const { +void AMDGPUAsmBackend::relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const { + MCInst Res; unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()); Res.setOpcode(RelaxedOpcode); Res.addOperand(Inst.getOperand(0)); + Inst = std::move(Res); return; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index d352219a7a98..619fde74e88d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -6,8 +6,10 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUFixupKinds.h" #include "AMDGPUMCTargetDesc.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" @@ -80,6 +82,15 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AMDGPU_ABS64; } + if (Fixup.getTargetKind() == AMDGPU::fixup_si_sopp_br) { + const auto *SymA = Target.getSymA(); + assert(SymA); + + Ctx.reportError(Fixup.getLoc(), + Twine("undefined label '") + SymA->getSymbol().getName() + "'"); + return ELF::R_AMDGPU_NONE; + } + llvm_unreachable("unhandled relocation type"); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index f65dc25d7eec..fe063d33ea3e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -26,6 +27,28 @@ using namespace llvm; using namespace llvm::AMDGPU; +static cl::opt<bool> Keep16BitSuffixes( + "amdgpu-keep-16-bit-reg-suffixes", + cl::desc("Keep .l and .h suffixes in asm for debugging purposes"), + cl::init(false), + cl::ReallyHidden); + +void AMDGPUInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + // FIXME: The current implementation of + // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this + // as an integer or we provide a name which represents a physical register. + // For CFI instructions we really want to emit a name for the DWARF register + // instead, because there may be multiple DWARF registers corresponding to a + // single physical register. One case where this problem manifests is with + // wave32/wave64 where using the physical register name is ambiguous: if we + // write e.g. `.cfi_undefined v0` we lose information about the wavefront + // size which we need to encode the register in the final DWARF. Ideally we + // would extend MC to support parsing DWARF register names so we could do + // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with + // non-pretty DWARF register names in assembly text. + OS << RegNo; +} + void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) { @@ -164,10 +187,10 @@ void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo, printU32ImmOperand(MI, OpNo, STI, O); } -void AMDGPUInstPrinter::printSMRDOffset20(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - printU32ImmOperand(MI, OpNo, STI, O); + O << formatHex(MI->getOperand(OpNo).getImm()); } void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, @@ -244,6 +267,11 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "r128"); } +void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + printNamedBit(MI, OpNo, O, "a16"); +} + void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "lwe"); @@ -287,7 +315,6 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, switch (RegNo) { case AMDGPU::FP_REG: case AMDGPU::SP_REG: - case AMDGPU::SCRATCH_WAVE_OFFSET_REG: case AMDGPU::PRIVATE_RSRC_REG: llvm_unreachable("pseudo-register should not ever be emitted"); case AMDGPU::SCC: @@ -297,7 +324,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, } #endif - O << getRegisterName(RegNo); + StringRef RegName(getRegisterName(RegNo)); + if (!Keep16BitSuffixes) + if (!RegName.consume_back(".l")) + RegName.consume_back(".h"); + + O << RegName; } void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, @@ -346,11 +378,21 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo, STI, O); } +void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { + int16_t SImm = static_cast<int16_t>(Imm); + if (isInlinableIntLiteral(SImm)) + O << SImm; + else + O << formatHex(static_cast<uint64_t>(Imm)); +} + void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O) { int16_t SImm = static_cast<int16_t>(Imm); - if (SImm >= -16 && SImm <= 64) { + if (isInlinableIntLiteral(SImm)) { O << SImm; return; } @@ -518,7 +560,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { printRegOperand(Op.getReg(), O, MRI); } else if (Op.isImm()) { - switch (Desc.OpInfo[OpNo].OperandType) { + const uint8_t OpTy = Desc.OpInfo[OpNo].OperandType; + switch (OpTy) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: @@ -535,10 +578,12 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, printImmediate64(Op.getImm(), STI, O); break; case AMDGPU::OPERAND_REG_INLINE_C_INT16: - case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: - case AMDGPU::OPERAND_REG_INLINE_AC_FP16: case AMDGPU::OPERAND_REG_IMM_INT16: + printImmediateInt16(Op.getImm(), STI, O); + break; + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: case AMDGPU::OPERAND_REG_IMM_FP16: printImmediate16(Op.getImm(), STI, O); break; @@ -549,11 +594,19 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, printImmediate32(Op.getImm(), STI, O); break; } + + // Deal with 16-bit FP inline immediates not working. + if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) { + printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O); + break; + } LLVM_FALLTHROUGH; - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O); + break; + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: printImmediateV216(Op.getImm(), STI, O); break; case MCOI::OPERAND_UNKNOWN: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index ba53003e9041..6dfd23ea72e6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -23,6 +23,7 @@ public: : MCInstPrinter(MAI, MII, MRI) {} //Autogenerated by tblgen + void printRegName(raw_ostream &OS, unsigned RegNo) const override; void printInstruction(const MCInst *MI, uint64_t Address, const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); @@ -60,7 +61,7 @@ private: raw_ostream &O); void printSMRDOffset8(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printSMRDOffset20(const MCInst *MI, unsigned OpNo, + void printSMEMOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); @@ -86,6 +87,8 @@ private: raw_ostream &O); void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printGFX10A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printD16(const MCInst *MI, unsigned OpNo, @@ -102,8 +105,12 @@ private: raw_ostream &O); void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); + void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, @@ -112,6 +119,10 @@ private: raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { + printOperand(MI, OpNum, STI, O); + } void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 9644e66fda4e..687cfef4559f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -43,6 +43,9 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, WeakRefDirective = ".weakref\t"; //===--- Dwarf Emission Directives -----------------------------------===// SupportsDebugInformation = true; + DwarfRegNumForCFI = true; + + UseIntegratedAssembler = false; } bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index 62757a707890..d7d8c8181b02 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -51,6 +51,12 @@ public: return 0; } + virtual unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 9507836c64c2..7d3235efc59e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -61,7 +61,13 @@ static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) { if (TT.getArch() == Triple::r600) InitR600MCRegisterInfo(X, 0); else - InitAMDGPUMCRegisterInfo(X, 0); + InitAMDGPUMCRegisterInfo(X, AMDGPU::PC_REG); + return X; +} + +MCRegisterInfo *llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAMDGPUMCRegisterInfo(X, AMDGPU::PC_REG, DwarfFlavour); return X; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 9754d31fee60..b9cdbc6502e5 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -33,6 +33,10 @@ class Target; class Triple; class raw_pwrite_stream; +enum AMDGPUDwarfFlavour { Wave64 = 0, Wave32 = 1 }; + +MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour); + MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index fef665c2900e..3d202d7960d6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -43,7 +43,7 @@ using namespace llvm::AMDGPU::HSAMD; bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; - if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) + if (HSAMD::fromString(std::string(HSAMetadataString), HSAMetadata)) return false; return EmitHSAMetadata(HSAMetadata); @@ -97,6 +97,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; } @@ -148,6 +149,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; + case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } @@ -210,9 +212,9 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, - unsigned Align) { - OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align - << '\n'; + Align Alignment) { + OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " + << Alignment.value() << '\n'; } bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { @@ -393,9 +395,9 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// -AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( - MCStreamer &S, const MCSubtargetInfo &STI) - : AMDGPUTargetStreamer(S), Streamer(S) { +AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) { MCAssembler &MCA = getStreamer().getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); @@ -427,7 +429,7 @@ void AMDGPUTargetELFStreamer::finish() { if (Blob.empty()) return; EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type, - [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); }); + [&](MCELFStreamer &OS) { OS.emitBytes(Blob); }); } void AMDGPUTargetELFStreamer::EmitNote( @@ -438,16 +440,22 @@ void AMDGPUTargetELFStreamer::EmitNote( auto NameSZ = Name.size() + 1; + unsigned NoteFlags = 0; + // TODO Apparently, this is currently needed for OpenCL as mentioned in + // https://reviews.llvm.org/D74995 + if (Os == Triple::AMDHSA) + NoteFlags = ELF::SHF_ALLOC; + S.PushSection(); - S.SwitchSection(Context.getELFSection( - ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); - S.EmitIntValue(NameSZ, 4); // namesz - S.EmitValue(DescSZ, 4); // descz - S.EmitIntValue(NoteType, 4); // type - S.EmitBytes(Name); // name - S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 + S.SwitchSection( + Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); + S.emitInt32(NameSZ); // namesz + S.emitValue(DescSZ, 4); // descz + S.emitInt32(NoteType); // type + S.emitBytes(Name); // name + S.emitValueToAlignment(4, 0, 1, 0); // padding 0 EmitDesc(S); // desc - S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 + S.emitValueToAlignment(4, 0, 1, 0); // padding 0 S.PopSection(); } @@ -458,8 +466,8 @@ void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()), ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) { - OS.EmitIntValue(Major, 4); - OS.EmitIntValue(Minor, 4); + OS.emitInt32(Major); + OS.emitInt32(Minor); }); } @@ -478,15 +486,15 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()), ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { - OS.EmitIntValue(VendorNameSize, 2); - OS.EmitIntValue(ArchNameSize, 2); - OS.EmitIntValue(Major, 4); - OS.EmitIntValue(Minor, 4); - OS.EmitIntValue(Stepping, 4); - OS.EmitBytes(VendorName); - OS.EmitIntValue(0, 1); // NULL terminate VendorName - OS.EmitBytes(ArchName); - OS.EmitIntValue(0, 1); // NULL terminte ArchName + OS.emitInt16(VendorNameSize); + OS.emitInt16(ArchNameSize); + OS.emitInt32(Major); + OS.emitInt32(Minor); + OS.emitInt32(Stepping); + OS.emitBytes(VendorName); + OS.emitInt8(0); // NULL terminate VendorName + OS.emitBytes(ArchName); + OS.emitInt8(0); // NULL terminte ArchName }); } @@ -495,7 +503,7 @@ AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { MCStreamer &OS = getStreamer(); OS.PushSection(); - OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); + OS.emitBytes(StringRef((const char*)&Header, sizeof(Header))); OS.PopSection(); } @@ -507,9 +515,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, - unsigned Align) { - assert(isPowerOf2_32(Align)); - + Align Alignment) { MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol); SymbolELF->setType(ELF::STT_OBJECT); @@ -518,7 +524,7 @@ void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, SymbolELF->setExternal(true); } - if (SymbolELF->declareCommon(Size, Align, true)) { + if (SymbolELF->declareCommon(Size, Alignment.value(), true)) { report_fatal_error("Symbol: " + Symbol->getName() + " redeclared as different type"); } @@ -539,9 +545,9 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA, [&](MCELFStreamer &OS) { - OS.EmitLabel(DescBegin); - OS.EmitBytes(IsaVersionString); - OS.EmitLabel(DescEnd); + OS.emitLabel(DescBegin); + OS.emitBytes(IsaVersionString); + OS.emitLabel(DescEnd); }); return true; } @@ -566,9 +572,9 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, [&](MCELFStreamer &OS) { - OS.EmitLabel(DescBegin); - OS.EmitBytes(HSAMetadataString); - OS.EmitLabel(DescEnd); + OS.emitLabel(DescBegin); + OS.emitBytes(HSAMetadataString); + OS.emitLabel(DescEnd); }); return true; } @@ -590,9 +596,9 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata( EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA, [&](MCELFStreamer &OS) { - OS.EmitLabel(DescBegin); - OS.EmitBytes(HSAMetadataString); - OS.EmitLabel(DescEnd); + OS.emitLabel(DescBegin); + OS.emitBytes(HSAMetadataString); + OS.emitLabel(DescEnd); }); return true; } @@ -602,9 +608,9 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() { MCStreamer &OS = getStreamer(); OS.PushSection(); - OS.EmitValueToAlignment(64, Encoded_s_code_end, 4); + OS.emitValueToAlignment(64, Encoded_s_code_end, 4); for (unsigned I = 0; I < 48; ++I) - OS.EmitIntValue(Encoded_s_code_end, 4); + OS.emitInt32(Encoded_s_code_end); OS.PopSection(); return true; } @@ -637,22 +643,22 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT) KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); - Streamer.EmitLabel(KernelDescriptorSymbol); - Streamer.EmitBytes(StringRef( + Streamer.emitLabel(KernelDescriptorSymbol); + Streamer.emitBytes(StringRef( (const char*)&(KernelDescriptor), offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset))); // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The // expression being created is: // (start of kernel code) - (start of kernel descriptor) // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. - Streamer.EmitValue(MCBinaryExpr::createSub( + Streamer.emitValue(MCBinaryExpr::createSub( MCSymbolRefExpr::create( KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), MCSymbolRefExpr::create( KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), Context), sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); - Streamer.EmitBytes(StringRef( + Streamer.emitBytes(StringRef( (const char*)&(KernelDescriptor) + offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) + sizeof(KernelDescriptor.kernel_code_entry_byte_offset), diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 683b3e363b9a..a19d4646deb2 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -54,7 +54,7 @@ public: virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, - unsigned Align) = 0; + Align Alignment) = 0; /// \returns True on success, false on failure. virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; @@ -110,7 +110,7 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override; + void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override; /// \returns True on success, false on failure. bool EmitISAVersion(StringRef IsaVersionString) override; @@ -133,6 +133,7 @@ public: class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; + Triple::OSType Os; void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, function_ref<void(MCELFStreamer &)> EmitDesc); @@ -157,7 +158,7 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override; + void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override; /// \returns True on success, false on failure. bool EmitISAVersion(StringRef IsaVersionString) override; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 2f1f4e7a0392..f61470573050 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -47,7 +47,7 @@ public: /// Encode the instruction and write it to the OS. void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const override; /// \returns the encoding for an MCOperand. uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index f8ec3c36f019..2cd6c3a81d2b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPURegisterInfo.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -71,6 +70,10 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; + unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; + unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; @@ -105,6 +108,11 @@ static uint32_t getIntInlineImmEncoding(IntTy Imm) { return 0; } +static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) { + uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val)); + return IntImm == 0 ? 255 : IntImm; +} + static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) { uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val)); if (IntImm != 0) @@ -249,23 +257,27 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, return getLit64Encoding(static_cast<uint64_t>(Imm), STI); case AMDGPU::OPERAND_REG_IMM_INT16: - case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: - case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI); + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: // FIXME Is this correct? What do inline immediates do on SI for f16 src // which does not have f16 support? return getLit16Encoding(static_cast<uint16_t>(Imm), STI); - case AMDGPU::OPERAND_REG_IMM_V2INT16: - case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: { if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) return getLit32Encoding(static_cast<uint32_t>(Imm), STI); + if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) + return getLit16Encoding(static_cast<uint16_t>(Imm), STI); LLVM_FALLTHROUGH; + } case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI); + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { uint16_t Lo16 = static_cast<uint16_t>(Imm); uint32_t Encoding = getLit16Encoding(Lo16, STI); @@ -359,6 +371,15 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, return getMachineOpValue(MI, MO, Fixups, STI); } +unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + auto Offset = MI.getOperand(OpNo).getImm(); + // VI only supports 20-bit unsigned offsets. + assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset)); + return Offset; +} + unsigned SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, @@ -419,7 +440,13 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, // instructions use acc[0:1] modifier bits to distinguish. These bits are // encoded as a virtual 9th bit of the register for these operands. if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) || - MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg)) + MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg)) Enc |= 512; return Enc; |