diff options
Diffstat (limited to 'llvm/lib/Target/X86/MCTargetDesc')
19 files changed, 2135 insertions, 738 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp index 675a9c377b12d..0134b4efce727 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -56,7 +56,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (MI->getOpcode() == X86::CALLpcrel32 && (STI.getFeatureBits()[X86::Mode64Bit])) { OS << "\tcallq\t"; - printPCRelImm(MI, 0, OS); + printPCRelImm(MI, Address, 0, OS); } // data16 and data32 both have the same encoding of 0x66. While data32 is // valid only in 16 bit systems, data16 is valid in the rest. @@ -68,8 +68,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address, OS << "\tdata32"; } // Try to print any aliases first. - else if (!printAliasInstr(MI, OS) && - !printVecCompareInstr(MI, OS)) + else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS)) printInstruction(MI, Address, OS); // Next always print the annotation. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h index 3d5d384dc4a01..51ddae61d2510 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h @@ -30,9 +30,10 @@ public: // Autogenerated by tblgen, returns true if we successfully printed an // alias. - bool printAliasInstr(const MCInst *MI, raw_ostream &OS); - void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); + void printCustomAliasOperand(const MCInst *MI, uint64_t Address, + unsigned OpIdx, unsigned PrintMethodIdx, + raw_ostream &O); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &OS); @@ -46,13 +47,6 @@ public: void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); - void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index dffda5217675b..bf3b6bcb5463f 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -12,7 +12,9 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -60,10 +62,9 @@ public: else if (BranchType == "indirect") addKind(X86::AlignBranchIndirect); else { - report_fatal_error( - "'-x86-align-branch 'The branches's type is combination of jcc, " - "fused, jmp, call, ret, indirect.(plus separated)", - false); + errs() << "invalid argument " << BranchType.str() + << " to -x86-align-branch=; each element must be one of: fused, " + "jcc, jmp, call, ret, indirect.(plus separated)\n"; } } } @@ -85,13 +86,14 @@ cl::opt<unsigned> X86AlignBranchBoundary( cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( "x86-align-branch", - cl::desc("Specify types of branches to align (plus separated list of " - "types). The branches's types are combination of jcc, fused, " - "jmp, call, ret, indirect."), - cl::value_desc("jcc indicates conditional jumps, fused indicates fused " - "conditional jumps, jmp indicates unconditional jumps, call " - "indicates direct and indirect calls, ret indicates rets, " - "indirect indicates indirect jumps."), + cl::desc( + "Specify types of branches to align (plus separated list of types):" + "\njcc indicates conditional jumps" + "\nfused indicates fused conditional jumps" + "\njmp indicates direct unconditional jumps" + "\ncall indicates direct and indirect calls" + "\nret indicates rets" + "\nindirect indicates indirect unconditional jumps"), cl::location(X86AlignBranchKindLoc)); cl::opt<bool> X86AlignBranchWithin32BBoundaries( @@ -102,6 +104,18 @@ cl::opt<bool> X86AlignBranchWithin32BBoundaries( "assumptions about labels corresponding to particular instructions, " "and should be used with caution.")); +cl::opt<unsigned> X86PadMaxPrefixSize( + "x86-pad-max-prefix-size", cl::init(0), + cl::desc("Maximum number of prefixes to use for padding")); + +cl::opt<bool> X86PadForAlign( + "x86-pad-for-align", cl::init(true), cl::Hidden, + cl::desc("Pad previous instructions to implement align directives")); + +cl::opt<bool> X86PadForBranchAlign( + "x86-pad-for-branch-align", cl::init(true), cl::Hidden, + cl::desc("Pad previous instructions to implement branch alignment")); + class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine, @@ -114,14 +128,18 @@ class X86AsmBackend : public MCAsmBackend { std::unique_ptr<const MCInstrInfo> MCII; X86AlignBranchKind AlignBranchType; Align AlignBoundary; + unsigned TargetPrefixMax = 0; - bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; - - bool needAlign(MCObjectStreamer &OS) const; - bool needAlignInst(const MCInst &Inst) const; - MCBoundaryAlignFragment * - getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const; MCInst PrevInst; + MCBoundaryAlignFragment *PendingBA = nullptr; + std::pair<MCFragment *, size_t> PrevInstPosition; + bool CanPadInst; + + uint8_t determinePaddingPrefix(const MCInst &Inst) const; + bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; + bool needAlign(const MCInst &Inst) const; + bool canPadBranches(MCObjectStreamer &OS) const; + bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -142,11 +160,14 @@ public: AlignBoundary = assumeAligned(X86AlignBranchBoundary); if (X86AlignBranch.getNumOccurrences()) AlignBranchType = X86AlignBranchKindLoc; + if (X86PadMaxPrefixSize.getNumOccurrences()) + TargetPrefixMax = X86PadMaxPrefixSize; } bool allowAutoPadding() const override; - void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override; - void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override; + bool allowEnhancedRelaxation() const override; + void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override; + void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -155,7 +176,7 @@ public: Optional<MCFixupKind> getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override; @@ -171,22 +192,34 @@ public: const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override; - void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - MCInst &Res) const override; + void relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override; + + bool padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, + unsigned &RemainingSize) const; + + void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; } // end anonymous namespace -static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool is16BitMode) { +static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { unsigned Op = Inst.getOpcode(); switch (Op) { default: return Op; case X86::JCC_1: - return (is16BitMode) ? X86::JCC_2 : X86::JCC_4; + return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; case X86::JMP_1: - return (is16BitMode) ? X86::JMP_2 : X86::JMP_4; + return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; } } @@ -275,11 +308,11 @@ static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { } } -static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) { +static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { unsigned R = getRelaxedOpcodeArith(Inst); if (R != Inst.getOpcode()) return R; - return getRelaxedOpcodeBranch(Inst, is16BitMode); + return getRelaxedOpcodeBranch(Inst, Is16BitMode); } static X86::CondCode getCondFromBranch(const MCInst &MI, @@ -316,6 +349,11 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { return (BaseReg == X86::RIP); } +/// Check if the instruction is a prefix. +static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { + return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); +} + /// Check if the instruction is valid as the first instruction in macro fusion. static bool isFirstMacroFusibleInst(const MCInst &Inst, const MCInstrInfo &MCII) { @@ -327,6 +365,69 @@ static bool isFirstMacroFusibleInst(const MCInst &Inst, return FIK != X86::FirstMacroFusionInstKind::Invalid; } +/// X86 can reduce the bytes of NOP by padding instructions with prefixes to +/// get a better peformance in some cases. Here, we determine which prefix is +/// the most suitable. +/// +/// If the instruction has a segment override prefix, use the existing one. +/// If the target is 64-bit, use the CS. +/// If the target is 32-bit, +/// - If the instruction has a ESP/EBP base register, use SS. +/// - Otherwise use DS. +uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { + assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) && + "Prefixes can be added only in 32-bit or 64-bit mode."); + const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) + MemoryOperand += X86II::getOperandBias(Desc); + + unsigned SegmentReg = 0; + if (MemoryOperand >= 0) { + // Check for explicit segment override on memory operand. + SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); + } + + switch (TSFlags & X86II::FormMask) { + default: + break; + case X86II::RawFrmDstSrc: { + // Check segment override opcode prefix as needed (not for %ds). + if (Inst.getOperand(2).getReg() != X86::DS) + SegmentReg = Inst.getOperand(2).getReg(); + break; + } + case X86II::RawFrmSrc: { + // Check segment override opcode prefix as needed (not for %ds). + if (Inst.getOperand(1).getReg() != X86::DS) + SegmentReg = Inst.getOperand(1).getReg(); + break; + } + case X86II::RawFrmMemOffs: { + // Check segment override opcode prefix as needed. + SegmentReg = Inst.getOperand(1).getReg(); + break; + } + } + + if (SegmentReg != 0) + return X86::getSegmentOverridePrefixForReg(SegmentReg); + + if (STI.hasFeature(X86::Mode64Bit)) + return X86::CS_Encoding; + + if (MemoryOperand >= 0) { + unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; + unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::ESP || BaseReg == X86::EBP) + return X86::SS_Encoding; + } + return X86::DS_Encoding; +} + /// Check if the two instructions will be macro-fused on the target cpu. bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); @@ -355,19 +456,122 @@ static bool hasVariantSymbol(const MCInst &MI) { } bool X86AsmBackend::allowAutoPadding() const { - return (AlignBoundary != Align::None() && - AlignBranchType != X86::AlignBranchNone); + return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); +} + +bool X86AsmBackend::allowEnhancedRelaxation() const { + return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; +} + +/// X86 has certain instructions which enable interrupts exactly one +/// instruction *after* the instruction which stores to SS. Return true if the +/// given instruction has such an interrupt delay slot. +static bool hasInterruptDelaySlot(const MCInst &Inst) { + switch (Inst.getOpcode()) { + case X86::POPSS16: + case X86::POPSS32: + case X86::STI: + return true; + + case X86::MOV16sr: + case X86::MOV32sr: + case X86::MOV64sr: + case X86::MOV16sm: + if (Inst.getOperand(0).getReg() == X86::SS) + return true; + break; + } + return false; +} + +/// Check if the instruction to be emitted is right after any data. +static bool +isRightAfterData(MCFragment *CurrentFragment, + const std::pair<MCFragment *, size_t> &PrevInstPosition) { + MCFragment *F = CurrentFragment; + // Empty data fragments may be created to prevent further data being + // added into the previous fragment, we need to skip them since they + // have no contents. + for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) + if (cast<MCDataFragment>(F)->getContents().size() != 0) + break; + + // Since data is always emitted into a DataFragment, our check strategy is + // simple here. + // - If the fragment is a DataFragment + // - If it's not the fragment where the previous instruction is, + // returns true. + // - If it's the fragment holding the previous instruction but its + // size changed since the the previous instruction was emitted into + // it, returns true. + // - Otherwise returns false. + // - If the fragment is not a DataFragment, returns false. + if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) + return DF != PrevInstPosition.first || + DF->getContents().size() != PrevInstPosition.second; + + return false; +} + +/// \returns the fragment size if it has instructions, otherwise returns 0. +static size_t getSizeForInstFragment(const MCFragment *F) { + if (!F || !F->hasInstructions()) + return 0; + // MCEncodedFragmentWithContents being templated makes this tricky. + switch (F->getKind()) { + default: + llvm_unreachable("Unknown fragment with instructions!"); + case MCFragment::FT_Data: + return cast<MCDataFragment>(*F).getContents().size(); + case MCFragment::FT_Relaxable: + return cast<MCRelaxableFragment>(*F).getContents().size(); + case MCFragment::FT_CompactEncodedInst: + return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); + } +} + +/// Return true if we can insert NOP or prefixes automatically before the +/// the instruction to be emitted. +bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { + if (hasVariantSymbol(Inst)) + // Linker may rewrite the instruction with variant symbol operand(e.g. + // TLSCALL). + return false; + + if (hasInterruptDelaySlot(PrevInst)) + // If this instruction follows an interrupt enabling instruction with a one + // instruction delay, inserting a nop would change behavior. + return false; + + if (isPrefix(PrevInst, *MCII)) + // If this instruction follows a prefix, inserting a nop/prefix would change + // semantic. + return false; + + if (isPrefix(Inst, *MCII)) + // If this instruction is a prefix, inserting a prefix would change + // semantic. + return false; + + if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) + // If this instruction follows any data, there is no clear + // instruction boundary, inserting a nop/prefix would change semantic. + return false; + + return true; } -bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { +bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { if (!OS.getAllowAutoPadding()) return false; assert(allowAutoPadding() && "incorrect initialization!"); - MCAssembler &Assembler = OS.getAssembler(); - MCSection *Sec = OS.getCurrentSectionOnly(); + // We only pad in text section. + if (!OS.getCurrentSectionOnly()->getKind().isText()) + return false; + // To be Done: Currently don't deal with Bundle cases. - if (Assembler.isBundlingEnabled() && Sec->isBundleLocked()) + if (OS.getAssembler().isBundlingEnabled()) return false; // Branches only need to be aligned in 32-bit or 64-bit mode. @@ -377,59 +581,42 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { return true; } -/// Check if the instruction operand needs to be aligned. Padding is disabled -/// before intruction which may be rewritten by linker(e.g. TLSCALL). -bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { - // Linker may rewrite the instruction with variant symbol operand. - if (hasVariantSymbol(Inst)) - return false; - - const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode()); - return (InstDesc.isConditionalBranch() && +/// Check if the instruction operand needs to be aligned. +bool X86AsmBackend::needAlign(const MCInst &Inst) const { + const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); + return (Desc.isConditionalBranch() && (AlignBranchType & X86::AlignBranchJcc)) || - (InstDesc.isUnconditionalBranch() && + (Desc.isUnconditionalBranch() && (AlignBranchType & X86::AlignBranchJmp)) || - (InstDesc.isCall() && - (AlignBranchType & X86::AlignBranchCall)) || - (InstDesc.isReturn() && - (AlignBranchType & X86::AlignBranchRet)) || - (InstDesc.isIndirectBranch() && + (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || + (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || + (Desc.isIndirectBranch() && (AlignBranchType & X86::AlignBranchIndirect)); } -static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) { - // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it. - return !F.canEmitNops(); -} +/// Insert BoundaryAlignFragment before instructions to align branches. +void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, + const MCInst &Inst) { + CanPadInst = canPadInst(Inst, OS); -MCBoundaryAlignFragment * -X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const { - auto *F = dyn_cast_or_null<MCBoundaryAlignFragment>(OS.getCurrentFragment()); - if (!F || !canReuseBoundaryAlignFragment(*F)) { - F = new MCBoundaryAlignFragment(AlignBoundary); - OS.insert(F); - } - return F; -} + if (!canPadBranches(OS)) + return; + + if (!isMacroFused(PrevInst, Inst)) + // Macro fusion doesn't happen indeed, clear the pending. + PendingBA = nullptr; -/// Insert MCBoundaryAlignFragment before instructions to align branches. -void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, - const MCInst &Inst) { - if (!needAlign(OS)) + if (!CanPadInst) return; - MCFragment *CF = OS.getCurrentFragment(); - bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused; - if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) { + if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { // Macro fusion actually happens and there is no other fragment inserted - // after the previous instruction. NOP can be emitted in PF to align fused - // jcc. - if (auto *PF = - dyn_cast_or_null<MCBoundaryAlignFragment>(CF->getPrevNode())) { - const_cast<MCBoundaryAlignFragment *>(PF)->setEmitNops(true); - const_cast<MCBoundaryAlignFragment *>(PF)->setFused(true); - } - } else if (needAlignInst(Inst)) { + // after the previous instruction. + // + // Do nothing here since we already inserted a BoudaryAlign fragment when + // we met the first instruction in the fused pair and we'll tie them + // together in emitInstructionEnd. + // // Note: When there is at least one fragment, such as MCAlignFragment, // inserted after the previous instruction, e.g. // @@ -441,34 +628,41 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS, // // We will treat the JCC as a unfused branch although it may be fused // with the CMP. - auto *F = getOrCreateBoundaryAlignFragment(OS); - F->setEmitNops(true); - F->setFused(false); - } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) { - // We don't know if macro fusion happens until the reaching the next - // instruction, so a place holder is put here if necessary. - getOrCreateBoundaryAlignFragment(OS); + return; } - PrevInst = Inst; + if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && + isFirstMacroFusibleInst(Inst, *MCII))) { + // If we meet a unfused branch or the first instuction in a fusiable pair, + // insert a BoundaryAlign fragment. + OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary)); + } } -/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned -/// if necessary. -void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { - if (!needAlign(OS)) +/// Set the last fragment to be aligned for the BoundaryAlignFragment. +void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { + PrevInst = Inst; + MCFragment *CF = OS.getCurrentFragment(); + PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); + if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) + F->setAllowAutoPadding(CanPadInst); + + if (!canPadBranches(OS)) return; - // If the branch is emitted into a MCRelaxableFragment, we can determine the - // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the - // branch is fused, the fused branch(macro fusion pair) must be emitted into - // two fragments. Or when the branch is unfused, the branch must be emitted - // into one fragment. The MCRelaxableFragment naturally marks the end of the - // fused or unfused branch. - // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of - // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align - // other branch. - if (needAlignInst(Inst) && !isa<MCRelaxableFragment>(OS.getCurrentFragment())) - OS.insert(new MCBoundaryAlignFragment(AlignBoundary)); + + if (!needAlign(Inst) || !PendingBA) + return; + + // Tie the aligned instructions into a a pending BoundaryAlign. + PendingBA->setLastFragment(CF); + PendingBA = nullptr; + + // We need to ensure that further data isn't added to the current + // DataFragment, so that we can get the size of instructions later in + // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty + // DataFragment. + if (isa_and_nonnull<MCDataFragment>(CF)) + OS.insert(new MCDataFragment()); // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); @@ -478,13 +672,23 @@ void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) { Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { if (STI.getTargetTriple().isOSBinFormatELF()) { + unsigned Type; if (STI.getTargetTriple().getArch() == Triple::x86_64) { - if (Name == "R_X86_64_NONE") - return FK_NONE; + Type = llvm::StringSwitch<unsigned>(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" +#undef ELF_RELOC + .Default(-1u); } else { - if (Name == "R_386_NONE") - return FK_NONE; + Type = llvm::StringSwitch<unsigned>(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/i386.def" +#undef ELF_RELOC + .Default(-1u); } + if (Type == -1u) + return None; + return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); } return MCAsmBackend::getFixupKind(Name); } @@ -502,6 +706,11 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, }; + // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They + // do not require any extra processing. + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); @@ -514,7 +723,7 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, const MCFixup &Fixup, const MCValue &) { - return Fixup.getKind() == FK_NONE; + return Fixup.getKind() >= FirstLiteralRelocationKind; } static unsigned getFixupKindSize(unsigned Kind) { @@ -556,7 +765,10 @@ void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, MutableArrayRef<char> Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const { - unsigned Size = getFixupKindSize(Fixup.getKind()); + unsigned Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return; + unsigned Size = getFixupKindSize(Kind); assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); @@ -613,12 +825,11 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? -void X86AsmBackend::relaxInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI, - MCInst &Res) const { +void X86AsmBackend::relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const { // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. - bool is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; - unsigned RelaxedOp = getRelaxedOpcode(Inst, is16BitMode); + bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; + unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); if (RelaxedOp == Inst.getOpcode()) { SmallString<256> Tmp; @@ -628,8 +839,232 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, report_fatal_error("unexpected instruction to relax: " + OS.str()); } - Res = Inst; - Res.setOpcode(RelaxedOp); + Inst.setOpcode(RelaxedOp); +} + +/// Return true if this instruction has been fully relaxed into it's most +/// general available form. +static bool isFullyRelaxed(const MCRelaxableFragment &RF) { + auto &Inst = RF.getInst(); + auto &STI = *RF.getSubtargetInfo(); + bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; + return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); +} + +bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (!RF.getAllowAutoPadding()) + return false; + // If the instruction isn't fully relaxed, shifting it around might require a + // larger value for one of the fixups then can be encoded. The outer loop + // will also catch this before moving to the next instruction, but we need to + // prevent padding this single instruction as well. + if (!isFullyRelaxed(RF)) + return false; + + const unsigned OldSize = RF.getContents().size(); + if (OldSize == 15) + return false; + + const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); + const unsigned RemainingPrefixSize = [&]() -> unsigned { + SmallString<15> Code; + raw_svector_ostream VecOS(Code); + Emitter.emitPrefix(RF.getInst(), VecOS, STI); + assert(Code.size() < 15 && "The number of prefixes must be less than 15."); + + // TODO: It turns out we need a decent amount of plumbing for the target + // specific bits to determine number of prefixes its safe to add. Various + // targets (older chips mostly, but also Atom family) encounter decoder + // stalls with too many prefixes. For testing purposes, we set the value + // externally for the moment. + unsigned ExistingPrefixSize = Code.size(); + if (TargetPrefixMax <= ExistingPrefixSize) + return 0; + return TargetPrefixMax - ExistingPrefixSize; + }(); + const unsigned PrefixBytesToAdd = + std::min(MaxPossiblePad, RemainingPrefixSize); + if (PrefixBytesToAdd == 0) + return false; + + const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); + + SmallString<256> Code; + Code.append(PrefixBytesToAdd, Prefix); + Code.append(RF.getContents().begin(), RF.getContents().end()); + RF.getContents() = Code; + + // Adjust the fixups for the change in offsets + for (auto &F : RF.getFixups()) { + F.setOffset(F.getOffset() + PrefixBytesToAdd); + } + + RemainingSize -= PrefixBytesToAdd; + return true; +} + +bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + if (isFullyRelaxed(RF)) + // TODO: There are lots of other tricks we could apply for increasing + // encoding size without impacting performance. + return false; + + MCInst Relaxed = RF.getInst(); + relaxInstruction(Relaxed, *RF.getSubtargetInfo()); + + SmallVector<MCFixup, 4> Fixups; + SmallString<15> Code; + raw_svector_ostream VecOS(Code); + Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); + const unsigned OldSize = RF.getContents().size(); + const unsigned NewSize = Code.size(); + assert(NewSize >= OldSize && "size decrease during relaxation?"); + unsigned Delta = NewSize - OldSize; + if (Delta > RemainingSize) + return false; + RF.setInst(Relaxed); + RF.getContents() = Code; + RF.getFixups() = Fixups; + RemainingSize -= Delta; + return true; +} + +bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) const { + bool Changed = false; + if (RemainingSize != 0) + Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); + if (RemainingSize != 0) + Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); + return Changed; +} + +void X86AsmBackend::finishLayout(MCAssembler const &Asm, + MCAsmLayout &Layout) const { + // See if we can further relax some instructions to cut down on the number of + // nop bytes required for code alignment. The actual win is in reducing + // instruction count, not number of bytes. Modern X86-64 can easily end up + // decode limited. It is often better to reduce the number of instructions + // (i.e. eliminate nops) even at the cost of increasing the size and + // complexity of others. + if (!X86PadForAlign && !X86PadForBranchAlign) + return; + + DenseSet<MCFragment *> LabeledFragments; + for (const MCSymbol &S : Asm.symbols()) + LabeledFragments.insert(S.getFragment(false)); + + for (MCSection &Sec : Asm) { + if (!Sec.getKind().isText()) + continue; + + SmallVector<MCRelaxableFragment *, 4> Relaxable; + for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { + MCFragment &F = *I; + + if (LabeledFragments.count(&F)) + Relaxable.clear(); + + if (F.getKind() == MCFragment::FT_Data || + F.getKind() == MCFragment::FT_CompactEncodedInst) + // Skip and ignore + continue; + + if (F.getKind() == MCFragment::FT_Relaxable) { + auto &RF = cast<MCRelaxableFragment>(*I); + Relaxable.push_back(&RF); + continue; + } + + auto canHandle = [](MCFragment &F) -> bool { + switch (F.getKind()) { + default: + return false; + case MCFragment::FT_Align: + return X86PadForAlign; + case MCFragment::FT_BoundaryAlign: + return X86PadForBranchAlign; + } + }; + // For any unhandled kind, assume we can't change layout. + if (!canHandle(F)) { + Relaxable.clear(); + continue; + } + +#ifndef NDEBUG + const uint64_t OrigOffset = Layout.getFragmentOffset(&F); +#endif + const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); + + // To keep the effects local, prefer to relax instructions closest to + // the align directive. This is purely about human understandability + // of the resulting code. If we later find a reason to expand + // particular instructions over others, we can adjust. + MCFragment *FirstChangedFragment = nullptr; + unsigned RemainingSize = OrigSize; + while (!Relaxable.empty() && RemainingSize != 0) { + auto &RF = *Relaxable.pop_back_val(); + // Give the backend a chance to play any tricks it wishes to increase + // the encoding size of the given instruction. Target independent code + // will try further relaxation, but target's may play further tricks. + if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) + FirstChangedFragment = &RF; + + // If we have an instruction which hasn't been fully relaxed, we can't + // skip past it and insert bytes before it. Changing its starting + // offset might require a larger negative offset than it can encode. + // We don't need to worry about larger positive offsets as none of the + // possible offsets between this and our align are visible, and the + // ones afterwards aren't changing. + if (!isFullyRelaxed(RF)) + break; + } + Relaxable.clear(); + + if (FirstChangedFragment) { + // Make sure the offsets for any fragments in the effected range get + // updated. Note that this (conservatively) invalidates the offsets of + // those following, but this is not required. + Layout.invalidateFragmentsFrom(FirstChangedFragment); + } + + // BoundaryAlign explicitly tracks it's size (unlike align) + if (F.getKind() == MCFragment::FT_BoundaryAlign) + cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); + +#ifndef NDEBUG + const uint64_t FinalOffset = Layout.getFragmentOffset(&F); + const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); + assert(OrigOffset + OrigSize == FinalOffset + FinalSize && + "can't move start of next fragment!"); + assert(FinalSize == RemainingSize && "inconsistent size computation?"); +#endif + + // If we're looking at a boundary align, make sure we don't try to pad + // its target instructions for some following directive. Doing so would + // break the alignment of the current boundary align. + if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { + const MCFragment *LastFragment = BF->getLastFragment(); + if (!LastFragment) + continue; + while (&*I != LastFragment) + ++I; + } + } + } + + // The layout is done. Mark every fragment as valid. + for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { + MCSection &Section = *Layout.getSectionOrder()[i]; + Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); + Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); + } } /// Write a sequence of optimal nops to the output, covering \p Count @@ -661,7 +1096,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { // This CPU doesn't support long nops. If needed add more. // FIXME: We could generated something better than plain 0x90. - if (!STI.getFeatureBits()[X86::FeatureNOPL]) { + if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) { for (uint64_t i = 0; i < Count; ++i) OS << '\x90'; return true; @@ -670,7 +1105,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { // 15-bytes is the longest single NOP instruction, but 10-bytes is // commonly the longest that can be efficiently decoded. uint64_t MaxNopLength = 10; - if (STI.getFeatureBits()[X86::ProcIntelSLM]) + if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP]) MaxNopLength = 7; else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) MaxNopLength = 15; @@ -811,6 +1246,7 @@ class DarwinX86AsmBackend : public X86AsmBackend { enum { CU_NUM_SAVED_REGS = 6 }; mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; + Triple TT; bool Is64Bit; unsigned OffsetSize; ///< Offset of a "push" instruction. @@ -838,10 +1274,140 @@ protected: return 1; } +private: + /// Get the compact unwind number for a given register. The number + /// corresponds to the enum lists in compact_unwind_encoding.h. + int getCompactUnwindRegNum(unsigned Reg) const { + static const MCPhysReg CU32BitRegs[7] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const MCPhysReg CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; + for (int Idx = 1; *CURegs; ++CURegs, ++Idx) + if (*CURegs == Reg) + return Idx; + + return -1; + } + + /// Return the registers encoded for a compact encoding with a frame + /// pointer. + uint32_t encodeCompactUnwindRegistersWithFrame() const { + // Encode the registers in the order they were saved --- 3-bits per + // register. The list of saved registers is assumed to be in reverse + // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. + uint32_t RegEnc = 0; + for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { + unsigned Reg = SavedRegs[i]; + if (Reg == 0) break; + + int CURegNum = getCompactUnwindRegNum(Reg); + if (CURegNum == -1) return ~0U; + + // Encode the 3-bit register number in order, skipping over 3-bits for + // each register. + RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); + } + + assert((RegEnc & 0x3FFFF) == RegEnc && + "Invalid compact register encoding!"); + return RegEnc; + } + + /// Create the permutation encoding used with frameless stacks. It is + /// passed the number of registers to be saved and an array of the registers + /// saved. + uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { + // The saved registers are numbered from 1 to 6. In order to encode the + // order in which they were saved, we re-number them according to their + // place in the register order. The re-numbering is relative to the last + // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in + // that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + for (unsigned i = 0; i < RegCount; ++i) { + int CUReg = getCompactUnwindRegNum(SavedRegs[i]); + if (CUReg == -1) return ~0U; + SavedRegs[i] = CUReg; + } + + // Reverse the list. + std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); + + uint32_t RenumRegs[CU_NUM_SAVED_REGS]; + for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ + unsigned Countless = 0; + for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) + if (SavedRegs[j] < SavedRegs[i]) + ++Countless; + + RenumRegs[i] = SavedRegs[i] - Countless - 1; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (RegCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] + + 6 * RenumRegs[3] + 2 * RenumRegs[4] + + RenumRegs[5]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] + + 3 * RenumRegs[4] + RenumRegs[5]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] + + RenumRegs[5]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; + break; + case 1: + permutationEncoding |= RenumRegs[5]; + break; + } + + assert((permutationEncoding & 0x3FF) == permutationEncoding && + "Invalid compact register encoding!"); + return permutationEncoding; + } + +public: + DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) + : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), + Is64Bit(TT.isArch64Bit()) { + memset(SavedRegs, 0, sizeof(SavedRegs)); + OffsetSize = Is64Bit ? 8 : 4; + MoveInstrSize = Is64Bit ? 3 : 2; + StackDivide = Is64Bit ? 8 : 4; + } + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + uint32_t CPUType = cantFail(MachO::getCPUType(TT)); + uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); + return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); + } + /// Implementation of algorithm to generate the compact unwind encoding /// for the CFI instructions. uint32_t - generateCompactUnwindEncodingImpl(ArrayRef<MCCFIInstruction> Instrs) const { + generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override { if (Instrs.empty()) return 0; // Reset the saved registers. @@ -904,7 +1470,7 @@ protected: // L0: // .cfi_def_cfa_offset 80 // - StackSize = std::abs(Inst.getOffset()) / StackDivide; + StackSize = Inst.getOffset() / StackDivide; ++NumDefCFAOffsets; break; } @@ -991,168 +1557,6 @@ protected: return CompactUnwindEncoding; } - -private: - /// Get the compact unwind number for a given register. The number - /// corresponds to the enum lists in compact_unwind_encoding.h. - int getCompactUnwindRegNum(unsigned Reg) const { - static const MCPhysReg CU32BitRegs[7] = { - X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 - }; - static const MCPhysReg CU64BitRegs[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; - for (int Idx = 1; *CURegs; ++CURegs, ++Idx) - if (*CURegs == Reg) - return Idx; - - return -1; - } - - /// Return the registers encoded for a compact encoding with a frame - /// pointer. - uint32_t encodeCompactUnwindRegistersWithFrame() const { - // Encode the registers in the order they were saved --- 3-bits per - // register. The list of saved registers is assumed to be in reverse - // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. - uint32_t RegEnc = 0; - for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { - unsigned Reg = SavedRegs[i]; - if (Reg == 0) break; - - int CURegNum = getCompactUnwindRegNum(Reg); - if (CURegNum == -1) return ~0U; - - // Encode the 3-bit register number in order, skipping over 3-bits for - // each register. - RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); - } - - assert((RegEnc & 0x3FFFF) == RegEnc && - "Invalid compact register encoding!"); - return RegEnc; - } - - /// Create the permutation encoding used with frameless stacks. It is - /// passed the number of registers to be saved and an array of the registers - /// saved. - uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { - // The saved registers are numbered from 1 to 6. In order to encode the - // order in which they were saved, we re-number them according to their - // place in the register order. The re-numbering is relative to the last - // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in - // that order: - // - // Orig Re-Num - // ---- ------ - // 6 6 - // 2 2 - // 4 3 - // 5 3 - // - for (unsigned i = 0; i < RegCount; ++i) { - int CUReg = getCompactUnwindRegNum(SavedRegs[i]); - if (CUReg == -1) return ~0U; - SavedRegs[i] = CUReg; - } - - // Reverse the list. - std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); - - uint32_t RenumRegs[CU_NUM_SAVED_REGS]; - for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ - unsigned Countless = 0; - for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) - if (SavedRegs[j] < SavedRegs[i]) - ++Countless; - - RenumRegs[i] = SavedRegs[i] - Countless - 1; - } - - // Take the renumbered values and encode them into a 10-bit number. - uint32_t permutationEncoding = 0; - switch (RegCount) { - case 6: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 5: - permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] - + 6 * RenumRegs[3] + 2 * RenumRegs[4] - + RenumRegs[5]; - break; - case 4: - permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] - + 3 * RenumRegs[4] + RenumRegs[5]; - break; - case 3: - permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] - + RenumRegs[5]; - break; - case 2: - permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; - break; - case 1: - permutationEncoding |= RenumRegs[5]; - break; - } - - assert((permutationEncoding & 0x3FF) == permutationEncoding && - "Invalid compact register encoding!"); - return permutationEncoding; - } - -public: - DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, bool Is64Bit) - : X86AsmBackend(T, STI), MRI(MRI), Is64Bit(Is64Bit) { - memset(SavedRegs, 0, sizeof(SavedRegs)); - OffsetSize = Is64Bit ? 8 : 4; - MoveInstrSize = Is64Bit ? 3 : 2; - StackDivide = Is64Bit ? 8 : 4; - } -}; - -class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { -public: - DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : DarwinX86AsmBackend(T, MRI, STI, false) {} - - std::unique_ptr<MCObjectTargetWriter> - createObjectTargetWriter() const override { - return createX86MachObjectWriter(/*Is64Bit=*/false, - MachO::CPU_TYPE_I386, - MachO::CPU_SUBTYPE_I386_ALL); - } - - /// Generate the compact unwind encoding for the CFI instructions. - uint32_t generateCompactUnwindEncoding( - ArrayRef<MCCFIInstruction> Instrs) const override { - return generateCompactUnwindEncodingImpl(Instrs); - } -}; - -class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { - const MachO::CPUSubTypeX86 Subtype; -public: - DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MachO::CPUSubTypeX86 st) - : DarwinX86AsmBackend(T, MRI, STI, true), Subtype(st) {} - - std::unique_ptr<MCObjectTargetWriter> - createObjectTargetWriter() const override { - return createX86MachObjectWriter(/*Is64Bit=*/true, MachO::CPU_TYPE_X86_64, - Subtype); - } - - /// Generate the compact unwind encoding for the CFI instructions. - uint32_t generateCompactUnwindEncoding( - ArrayRef<MCCFIInstruction> Instrs) const override { - return generateCompactUnwindEncodingImpl(Instrs); - } }; } // end anonymous namespace @@ -1163,7 +1567,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const MCTargetOptions &Options) { const Triple &TheTriple = STI.getTargetTriple(); if (TheTriple.isOSBinFormatMachO()) - return new DarwinX86_32AsmBackend(T, MRI, STI); + return new DarwinX86AsmBackend(T, MRI, STI); if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) return new WindowsX86AsmBackend(T, false, STI); @@ -1181,13 +1585,8 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { const Triple &TheTriple = STI.getTargetTriple(); - if (TheTriple.isOSBinFormatMachO()) { - MachO::CPUSubTypeX86 CS = - StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName()) - .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H) - .Default(MachO::CPU_SUBTYPE_X86_64_ALL); - return new DarwinX86_64AsmBackend(T, MRI, STI, CS); - } + if (TheTriple.isOSBinFormatMachO()) + return new DarwinX86AsmBackend(T, MRI, STI); if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) return new WindowsX86AsmBackend(T, true, STI); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index a4f8dd669e1e5..79f07d3c7792a 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -91,7 +91,7 @@ namespace X86 { COND_G = 15, LAST_VALID_COND = COND_G, - // Artificial condition codes. These are used by AnalyzeBranch + // Artificial condition codes. These are used by analyzeBranch // to indicate a block terminated with two conditional branches that together // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, // which can't be represented on x86 with a single condition. These @@ -356,6 +356,39 @@ namespace X86 { AlignBranchRet = 1U << 4, AlignBranchIndirect = 1U << 5 }; + + /// Defines the encoding values for segment override prefix. + enum EncodingOfSegmentOverridePrefix : uint8_t { + CS_Encoding = 0x2E, + DS_Encoding = 0x3E, + ES_Encoding = 0x26, + FS_Encoding = 0x64, + GS_Encoding = 0x65, + SS_Encoding = 0x36 + }; + + /// Given a segment register, return the encoding of the segment override + /// prefix for it. + inline EncodingOfSegmentOverridePrefix + getSegmentOverridePrefixForReg(unsigned Reg) { + switch (Reg) { + default: + llvm_unreachable("Unknown segment register!"); + case X86::CS: + return CS_Encoding; + case X86::DS: + return DS_Encoding; + case X86::ES: + return ES_Encoding; + case X86::FS: + return FS_Encoding; + case X86::GS: + return GS_Encoding; + case X86::SS: + return SS_Encoding; + } + } + } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that @@ -581,90 +614,107 @@ namespace X86II { /// in the lower 4 bits of the opcode. AddCCFrm = 9, + /// PrefixByte - This form is used for instructions that represent a prefix + /// byte like data16 or rep. + PrefixByte = 10, + /// MRM[0-7][rm] - These forms are used to represent instructions that use /// a Mod/RM byte, and use the middle field to hold extended opcode /// information. In the intel manual these are represented as /0, /1, ... /// + // Instructions operate on a register Reg/Opcode operand not the r/m field. + MRMr0 = 21, + + /// MRMSrcMem - But force to use the SIB field. + MRMSrcMemFSIB = 22, + + /// MRMDestMem - But force to use the SIB field. + MRMDestMemFSIB = 23, + /// MRMDestMem - This form is used for instructions that use the Mod/RM byte /// to specify a destination, which in this case is memory. /// - MRMDestMem = 32, + MRMDestMem = 24, /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte /// to specify a source, which in this case is memory. /// - MRMSrcMem = 33, + MRMSrcMem = 25, /// MRMSrcMem4VOp3 - This form is used for instructions that encode /// operand 3 with VEX.VVVV and load from memory. /// - MRMSrcMem4VOp3 = 34, + MRMSrcMem4VOp3 = 26, /// MRMSrcMemOp4 - This form is used for instructions that use the Mod/RM /// byte to specify the fourth source, which in this case is memory. /// - MRMSrcMemOp4 = 35, + MRMSrcMemOp4 = 27, /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM /// byte to specify the operands and also encodes a condition code. /// - MRMSrcMemCC = 36, + MRMSrcMemCC = 28, /// MRMXm - This form is used for instructions that use the Mod/RM byte /// to specify a memory source, but doesn't use the middle field. And has /// a condition code. /// - MRMXmCC = 38, + MRMXmCC = 30, /// MRMXm - This form is used for instructions that use the Mod/RM byte /// to specify a memory source, but doesn't use the middle field. /// - MRMXm = 39, + MRMXm = 31, // Next, instructions that operate on a memory r/m operand... - MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, // Format /0 /1 /2 /3 - MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47, // Format /4 /5 /6 /7 + MRM0m = 32, MRM1m = 33, MRM2m = 34, MRM3m = 35, // Format /0 /1 /2 /3 + MRM4m = 36, MRM5m = 37, MRM6m = 38, MRM7m = 39, // Format /4 /5 /6 /7 /// MRMDestReg - This form is used for instructions that use the Mod/RM byte /// to specify a destination, which in this case is a register. /// - MRMDestReg = 48, + MRMDestReg = 40, /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte /// to specify a source, which in this case is a register. /// - MRMSrcReg = 49, + MRMSrcReg = 41, /// MRMSrcReg4VOp3 - This form is used for instructions that encode /// operand 3 with VEX.VVVV and do not load from memory. /// - MRMSrcReg4VOp3 = 50, + MRMSrcReg4VOp3 = 42, /// MRMSrcRegOp4 - This form is used for instructions that use the Mod/RM /// byte to specify the fourth source, which in this case is a register. /// - MRMSrcRegOp4 = 51, + MRMSrcRegOp4 = 43, /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM /// byte to specify the operands and also encodes a condition code /// - MRMSrcRegCC = 52, + MRMSrcRegCC = 44, /// MRMXCCr - This form is used for instructions that use the Mod/RM byte /// to specify a register source, but doesn't use the middle field. And has /// a condition code. /// - MRMXrCC = 54, + MRMXrCC = 46, /// MRMXr - This form is used for instructions that use the Mod/RM byte /// to specify a register source, but doesn't use the middle field. /// - MRMXr = 55, + MRMXr = 47, // Instructions that operate on a register r/m operand... - MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, // Format /0 /1 /2 /3 - MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63, // Format /4 /5 /6 /7 + MRM0r = 48, MRM1r = 49, MRM2r = 50, MRM3r = 51, // Format /0 /1 /2 /3 + MRM4r = 52, MRM5r = 53, MRM6r = 54, MRM7r = 55, // Format /4 /5 /6 /7 + + // Instructions that operate that have mod=11 and an opcode but ignore r/m. + MRM0X = 56, MRM1X = 57, MRM2X = 58, MRM3X = 59, // Format /0 /1 /2 /3 + MRM4X = 60, MRM5X = 61, MRM6X = 62, MRM7X = 63, // Format /4 /5 /6 /7 /// MRM_XX - A mod/rm byte of exactly 0xXX. MRM_C0 = 64, MRM_C1 = 65, MRM_C2 = 66, MRM_C3 = 67, @@ -900,6 +950,16 @@ namespace X86II { NOTRACK = 1ULL << NoTrackShift }; + /// \returns true if the instruction with given opcode is a prefix. + inline bool isPrefix(uint64_t TSFlags) { + return (TSFlags & X86II::FormMask) == PrefixByte; + } + + /// \returns true if the instruction with given opcode is a pseudo. + inline bool isPseudo(uint64_t TSFlags) { + return (TSFlags & X86II::FormMask) == Pseudo; + } + /// \returns the "base" X86 opcode for the specified machine /// instruction. inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) { @@ -1028,10 +1088,13 @@ namespace X86II { case X86II::RawFrmDst: case X86II::RawFrmDstSrc: case X86II::AddCCFrm: + case X86II::PrefixByte: return -1; case X86II::MRMDestMem: + case X86II::MRMDestMemFSIB: return 0; case X86II::MRMSrcMem: + case X86II::MRMSrcMemFSIB: // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a // mask register. return 1 + HasVEX_4V + HasEVEX_K; @@ -1051,12 +1114,18 @@ namespace X86II { case X86II::MRMSrcRegOp4: case X86II::MRMSrcRegCC: case X86II::MRMXrCC: + case X86II::MRMr0: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: return -1; + case X86II::MRM0X: case X86II::MRM1X: + case X86II::MRM2X: case X86II::MRM3X: + case X86II::MRM4X: case X86II::MRM5X: + case X86II::MRM6X: case X86II::MRM7X: + return -1; case X86II::MRMXmCC: case X86II::MRMXm: case X86II::MRM0m: case X86II::MRM1m: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index bd009da60851e..292dd17e2f51c 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -317,8 +317,10 @@ static unsigned getRelocType32(MCContext &Ctx, unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); MCFixupKind Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return Kind - FirstLiteralRelocationKind; + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); X86_64RelType Type = getType64(Kind, Modifier, IsPCRel); if (getEMachine() == ELF::EM_X86_64) return getRelocType64(Ctx, Fixup.getLoc(), Modifier, Type, IsPCRel, Kind); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index 73b1969b4e822..b51011e2c52fc 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -15,7 +15,7 @@ #include "X86ATTInstPrinter.h" #include "X86BaseInfo.h" #include "X86MCTargetDesc.h" -#include "Utils/X86ShuffleDecode.h" +#include "X86ShuffleDecode.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" @@ -199,6 +199,40 @@ using namespace llvm; CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \ CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int) +#define CASE_FMA4(Inst, suf) \ + CASE_AVX_INS_COMMON(Inst, 4, suf) \ + CASE_AVX_INS_COMMON(Inst, 4Y, suf) + +#define CASE_FMA4_PACKED_RR(Inst) \ + CASE_FMA4(Inst##PD, rr) \ + CASE_FMA4(Inst##PS, rr) + +#define CASE_FMA4_PACKED_RM(Inst) \ + CASE_FMA4(Inst##PD, rm) \ + CASE_FMA4(Inst##PS, rm) + +#define CASE_FMA4_PACKED_MR(Inst) \ + CASE_FMA4(Inst##PD, mr) \ + CASE_FMA4(Inst##PS, mr) + +#define CASE_FMA4_SCALAR_RR(Inst) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rr) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rr) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int) + +#define CASE_FMA4_SCALAR_RM(Inst) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rm) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rm) \ + CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \ + CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int) + +#define CASE_FMA4_SCALAR_MR(Inst) \ + CASE_AVX_INS_COMMON(Inst##SD4, , mr) \ + CASE_AVX_INS_COMMON(Inst##SS4, , mr) \ + CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \ + CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int) + static unsigned getVectorRegSize(unsigned RegNo) { if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31) return 512; @@ -247,14 +281,15 @@ static void printMasking(raw_ostream &OS, const MCInst *MI, OS << " {z}"; } -static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) { +static bool printFMAComments(const MCInst *MI, raw_ostream &OS, + const MCInstrInfo &MCII) { const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr; unsigned NumOperands = MI->getNumOperands(); bool RegForm = false; bool Negate = false; StringRef AccStr = "+"; - // The operands for FMA instructions without rounding fall into two forms. + // The operands for FMA3 instructions without rounding fall into two forms: // dest, src1, src2, src3 // dest, src1, mask, src2, src3 // Where src3 is either a register or 5 memory address operands. So to find @@ -262,9 +297,112 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) { // index from the end by taking into account memory vs register form when // finding src2. + // The operands for FMA4 instructions: + // dest, src1, src2, src3 + // Where src2 OR src3 are either a register or 5 memory address operands. So + // to find dest and src1 we can index from the front, src2 (reg/mem) follows + // and then src3 (reg) will be at the end. + switch (MI->getOpcode()) { default: return false; + + CASE_FMA4_PACKED_RR(FMADD) + CASE_FMA4_SCALAR_RR(FMADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMADD) + CASE_FMA4_SCALAR_RM(FMADD) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + break; + CASE_FMA4_PACKED_MR(FMADD) + CASE_FMA4_SCALAR_MR(FMADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + break; + + CASE_FMA4_PACKED_RR(FMSUB) + CASE_FMA4_SCALAR_RR(FMSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMSUB) + CASE_FMA4_SCALAR_RM(FMSUB) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + break; + CASE_FMA4_PACKED_MR(FMSUB) + CASE_FMA4_SCALAR_MR(FMSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + break; + + CASE_FMA4_PACKED_RR(FNMADD) + CASE_FMA4_SCALAR_RR(FNMADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FNMADD) + CASE_FMA4_SCALAR_RM(FNMADD) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + Negate = true; + break; + CASE_FMA4_PACKED_MR(FNMADD) + CASE_FMA4_SCALAR_MR(FNMADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + Negate = true; + break; + + CASE_FMA4_PACKED_RR(FNMSUB) + CASE_FMA4_SCALAR_RR(FNMSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FNMSUB) + CASE_FMA4_SCALAR_RM(FNMSUB) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + Negate = true; + break; + CASE_FMA4_PACKED_MR(FNMSUB) + CASE_FMA4_SCALAR_MR(FNMSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-"; + Negate = true; + break; + + CASE_FMA4_PACKED_RR(FMADDSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMADDSUB) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "+/-"; + break; + CASE_FMA4_PACKED_MR(FMADDSUB) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "+/-"; + break; + + CASE_FMA4_PACKED_RR(FMSUBADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_FMA4_PACKED_RM(FMSUBADD) + Mul2Name = getRegName(MI->getOperand(2).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-/+"; + break; + CASE_FMA4_PACKED_MR(FMSUBADD) + AccName = getRegName(MI->getOperand(NumOperands - 1).getReg()); + Mul1Name = getRegName(MI->getOperand(1).getReg()); + AccStr = "-/+"; + break; + CASE_FMA_PACKED_REG(FMADD132) CASE_FMA_SCALAR_REG(FMADD132) Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); @@ -476,8 +614,9 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) { if (!Mul2Name) Mul2Name = "mem"; if (!AccName) AccName = "mem"; - OS << DestName << " = "; - // TODO: Print masking information? + OS << DestName; + printMasking(OS, MI, MCII); + OS << " = "; if (Negate) OS << '-'; @@ -504,7 +643,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, unsigned NumOperands = MI->getNumOperands(); bool RegForm = false; - if (printFMA3Comments(MI, OS)) + if (printFMAComments(MI, OS, MCII)) return true; switch (MI->getOpcode()) { @@ -669,14 +808,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PSLLDQri: case X86::VPSLLDQri: case X86::VPSLLDQYri: - case X86::VPSLLDQZ128rr: - case X86::VPSLLDQZ256rr: - case X86::VPSLLDQZrr: + case X86::VPSLLDQZ128ri: + case X86::VPSLLDQZ256ri: + case X86::VPSLLDQZri: Src1Name = getRegName(MI->getOperand(1).getReg()); LLVM_FALLTHROUGH; - case X86::VPSLLDQZ128rm: - case X86::VPSLLDQZ256rm: - case X86::VPSLLDQZrm: + case X86::VPSLLDQZ128mi: + case X86::VPSLLDQZ256mi: + case X86::VPSLLDQZmi: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) DecodePSLLDQMask(getRegOperandNumElts(MI, 8, 0), @@ -687,14 +826,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PSRLDQri: case X86::VPSRLDQri: case X86::VPSRLDQYri: - case X86::VPSRLDQZ128rr: - case X86::VPSRLDQZ256rr: - case X86::VPSRLDQZrr: + case X86::VPSRLDQZ128ri: + case X86::VPSRLDQZ256ri: + case X86::VPSRLDQZri: Src1Name = getRegName(MI->getOperand(1).getReg()); LLVM_FALLTHROUGH; - case X86::VPSRLDQZ128rm: - case X86::VPSRLDQZ256rm: - case X86::VPSRLDQZrm: + case X86::VPSRLDQZ128mi: + case X86::VPSRLDQZ256mi: + case X86::VPSRLDQZmi: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) DecodePSRLDQMask(getRegOperandNumElts(MI, 8, 0), @@ -1178,28 +1317,28 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeSubVectorBroadcast(16, 8, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, r) + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, rr) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; - CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, m) + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, rm) DecodeSubVectorBroadcast(4, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r) - CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r) + CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, rr) + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, rr) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; - CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m) - CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m) + CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, rm) + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, rm) DecodeSubVectorBroadcast(8, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r) - CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r) + CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, rr) + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, rr) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; - CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m) - CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m) + CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, rm) + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, rm) DecodeSubVectorBroadcast(16, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index a215550769765..33d70fdb12142 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -13,6 +13,7 @@ #include "X86InstPrinterCommon.h" #include "X86BaseInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -287,16 +288,23 @@ void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op, } } -/// printPCRelImm - This is used to print an immediate value that ends up -/// being encoded as a pc-relative value (e.g. for jumps and calls). In -/// Intel-style these print slightly differently than normal immediates. -/// for example, a $ is not emitted. -void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { +/// value (e.g. for jumps and calls). In Intel-style these print slightly +/// differently than normal immediates. For example, a $ is not emitted. +/// +/// \p Address The address of the next instruction. +/// \see MCInstPrinter::printInst +void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address, + unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) - O << formatImm(Op.getImm()); - else { + if (Op.isImm()) { + if (PrintBranchImmAsAddress) { + uint64_t Target = Address + Op.getImm(); + if (MAI.getCodePointerSize() == 4) + Target &= 0xffffffff; + O << formatHex(Target); + } else + O << formatImm(Op.getImm()); + } else { assert(Op.isExpr() && "unknown pcrel immediate operand"); // If a symbolic branch target was added as a constant expression then print // that address in hex. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h index 8e28f24b619a9..bb12ede3b7292 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h @@ -29,7 +29,9 @@ public: void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS); void printCMPMnemonic(const MCInst *MI, bool IsVCmp, raw_ostream &OS); void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O); - void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printPCRelImm(const MCInst *MI, uint64_t Address, unsigned OpNo, + raw_ostream &O); + protected: void printInstFlags(const MCInst *MI, raw_ostream &O); void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index f4bb0fbf62cd7..d1eb4d09851dd 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -45,8 +45,7 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (MI->getOpcode() == X86::DATA16_PREFIX && STI.getFeatureBits()[X86::Mode16Bit]) { OS << "\tdata32"; - } else if (!printAliasInstr(MI, OS) && - !printVecCompareInstr(MI, OS)) + } else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS)) printInstruction(MI, Address, OS); // Next always print the annotation. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h index b409b20cbea88..82baf611df038 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h @@ -31,9 +31,10 @@ public: // Autogenerated by tblgen, returns true if we successfully printed an // alias. - bool printAliasInstr(const MCInst *MI, raw_ostream &OS); - void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); + void printCustomAliasOperand(const MCInst *MI, uint64_t Address, + unsigned OpIdx, unsigned PrintMethodIdx, + raw_ostream &O); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); @@ -47,14 +48,6 @@ public: void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O); void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); - void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - - void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << "byte ptr "; printMemReference(MI, OpNo, O); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index d986c829d98eb..c294da6baffa7 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -71,8 +71,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { // (actually, must, since otherwise the non-extern relocations we produce // overwhelm ld64's tiny little mind and it fails). DwarfFDESymbolsUseAbsDiff = true; - - UseIntegratedAssembler = true; } X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) @@ -102,10 +100,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; - - // Always enable the integrated assembler by default. - // Clang also enabled it when the OS is Solaris but that is redundant here. - UseIntegratedAssembler = true; } const MCExpr * @@ -141,8 +135,16 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { TextAlignFillValue = 0x90; AllowAtInName = true; +} - UseIntegratedAssembler = true; +void X86MCAsmInfoMicrosoftMASM::anchor() { } + +X86MCAsmInfoMicrosoftMASM::X86MCAsmInfoMicrosoftMASM(const Triple &Triple) + : X86MCAsmInfoMicrosoft(Triple) { + DollarIsPC = true; + SeparatorString = "\n"; + CommentString = ";"; + AllowSymbolAtNameStart = true; } void X86MCAsmInfoGNUCOFF::anchor() { } @@ -164,6 +166,4 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { TextAlignFillValue = 0x90; AllowAtInName = true; - - UseIntegratedAssembler = true; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index b2369647a40f2..ce8e84fb96b9b 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h @@ -13,7 +13,6 @@ #ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H #define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmInfoCOFF.h" #include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCAsmInfoELF.h" @@ -49,6 +48,13 @@ public: explicit X86MCAsmInfoMicrosoft(const Triple &Triple); }; +class X86MCAsmInfoMicrosoftMASM : public X86MCAsmInfoMicrosoft { + void anchor() override; + +public: + explicit X86MCAsmInfoMicrosoftMASM(const Triple &Triple); +}; + class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF { void anchor() override; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 54a293702bd0f..7dea0760a8310 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -55,83 +55,64 @@ public: const MCSubtargetInfo &STI) const override; private: - unsigned getX86RegNum(const MCOperand &MO) const { - return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7; - } + unsigned getX86RegNum(const MCOperand &MO) const; - unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const { - return Ctx.getRegisterInfo()->getEncodingValue( - MI.getOperand(OpNum).getReg()); - } + unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const; /// \param MI a single low-level machine instruction. /// \param OpNum the operand #. /// \returns true if the OpNumth operand of MI require a bit to be set in /// REX prefix. - bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const { - return (getX86RegEncoding(MI, OpNum) >> 3) & 1; - } - - void emitByte(uint8_t C, unsigned &CurByte, raw_ostream &OS) const { - OS << (char)C; - ++CurByte; - } - - void emitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) const { - // Output the constant in little endian byte order. - for (unsigned i = 0; i != Size; ++i) { - emitByte(Val & 255, CurByte, OS); - Val >>= 8; - } - } + bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const; void emitImmediate(const MCOperand &Disp, SMLoc Loc, unsigned ImmSize, - MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, + MCFixupKind FixupKind, uint64_t StartByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const; - static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { - assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); - return RM | (RegOpcode << 3) | (Mod << 6); - } - void emitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, - unsigned &CurByte, raw_ostream &OS) const { - emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), CurByte, OS); - } + raw_ostream &OS) const; void emitSIBByte(unsigned SS, unsigned Index, unsigned Base, - unsigned &CurByte, raw_ostream &OS) const { - // SIB byte is in the same format as the modRMByte. - emitByte(modRMByte(SS, Index, Base), CurByte, OS); - } + raw_ostream &OS) const; void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - uint64_t TSFlags, bool Rex, unsigned &CurByte, + uint64_t TSFlags, bool HasREX, uint64_t StartByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI, + bool ForceSIB = false) const; - void emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, unsigned &CurByte, - bool &Rex, const MCInst &MI, const MCInstrDesc &Desc, - const MCSubtargetInfo &STI, raw_ostream &OS) const; + bool emitPrefixImpl(unsigned &CurOp, const MCInst &MI, + const MCSubtargetInfo &STI, raw_ostream &OS) const; - void emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const MCInstrDesc &Desc, + void emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, raw_ostream &OS) const; - void emitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand, - const MCInst &MI, raw_ostream &OS) const; + void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI, + raw_ostream &OS) const; - bool emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const MCInstrDesc &Desc, + bool emitOpcodePrefix(int MemOperand, const MCInst &MI, const MCSubtargetInfo &STI, raw_ostream &OS) const; - uint8_t determineREXPrefix(const MCInst &MI, uint64_t TSFlags, int MemOperand, - const MCInstrDesc &Desc) const; + bool emitREXPrefix(int MemOperand, const MCInst &MI, raw_ostream &OS) const; }; } // end anonymous namespace +static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { + assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); + return RM | (RegOpcode << 3) | (Mod << 6); +} + +static void emitByte(uint8_t C, raw_ostream &OS) { OS << static_cast<char>(C); } + +static void emitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != Size; ++i) { + emitByte(Val & 255, OS); + Val >>= 8; + } +} + /// \returns true if this signed displacement fits in a 8-bit sign-extended /// field. static bool isDisp8(int Value) { return Value == (int8_t)Value; } @@ -275,7 +256,8 @@ static bool hasSecRelSymbolRef(const MCExpr *Expr) { static bool isPCRel32Branch(const MCInst &MI, const MCInstrInfo &MCII) { unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MCII.get(Opcode); - if ((Opcode != X86::CALL64pcrel32 && Opcode != X86::JMP_4) || + if ((Opcode != X86::CALL64pcrel32 && Opcode != X86::JMP_4 && + Opcode != X86::JCC_4) || getImmFixupKind(Desc.TSFlags) != FK_PCRel_4) return false; @@ -288,9 +270,27 @@ static bool isPCRel32Branch(const MCInst &MI, const MCInstrInfo &MCII) { return Ref && Ref->getKind() == MCSymbolRefExpr::VK_None; } +unsigned X86MCCodeEmitter::getX86RegNum(const MCOperand &MO) const { + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7; +} + +unsigned X86MCCodeEmitter::getX86RegEncoding(const MCInst &MI, + unsigned OpNum) const { + return Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(OpNum).getReg()); +} + +/// \param MI a single low-level machine instruction. +/// \param OpNum the operand #. +/// \returns true if the OpNumth operand of MI require a bit to be set in +/// REX prefix. +bool X86MCCodeEmitter::isREXExtendedReg(const MCInst &MI, + unsigned OpNum) const { + return (getX86RegEncoding(MI, OpNum) >> 3) & 1; +} + void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, MCFixupKind FixupKind, - unsigned &CurByte, raw_ostream &OS, + uint64_t StartByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { const MCExpr *Expr = nullptr; @@ -299,7 +299,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc, // relocation, emit it now. if (FixupKind != FK_PCRel_1 && FixupKind != FK_PCRel_2 && FixupKind != FK_PCRel_4) { - emitConstant(DispOp.getImm() + ImmOffset, Size, CurByte, OS); + emitConstant(DispOp.getImm() + ImmOffset, Size, OS); return; } Expr = MCConstantExpr::create(DispOp.getImm(), Ctx); @@ -322,7 +322,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc, } if (Kind == GOT_Normal) - ImmOffset = CurByte; + ImmOffset = static_cast<int>(OS.tell() - StartByte); } else if (Expr->getKind() == MCExpr::SymbolRef) { if (hasSecRelSymbolRef(Expr)) { FixupKind = MCFixupKind(FK_SecRel_4); @@ -361,16 +361,30 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc, Ctx); // Emit a symbolic constant as a fixup and 4 zeros. - Fixups.push_back(MCFixup::create(CurByte, Expr, FixupKind, Loc)); - emitConstant(0, Size, CurByte, OS); + Fixups.push_back(MCFixup::create(static_cast<uint32_t>(OS.tell() - StartByte), + Expr, FixupKind, Loc)); + emitConstant(0, Size, OS); +} + +void X86MCCodeEmitter::emitRegModRMByte(const MCOperand &ModRMReg, + unsigned RegOpcodeFld, + raw_ostream &OS) const { + emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), OS); +} + +void X86MCCodeEmitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base, + raw_ostream &OS) const { + // SIB byte is in the same format as the modRMByte. + emitByte(modRMByte(SS, Index, Base), OS); } void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - uint64_t TSFlags, bool Rex, - unsigned &CurByte, raw_ostream &OS, + uint64_t TSFlags, bool HasREX, + uint64_t StartByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { + const MCSubtargetInfo &STI, + bool ForceSIB) const { const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp); const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg); const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt); @@ -383,8 +397,9 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode assert(STI.hasFeature(X86::Mode64Bit) && "Rip-relative addressing requires 64-bit mode"); - assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); - emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS); + assert(IndexReg.getReg() == 0 && !ForceSIB && + "Invalid rip-relative address"); + emitByte(modRMByte(0, RegOpcodeField, 5), OS); unsigned Opcode = MI.getOpcode(); // movq loads are handled with a special relocation form which allows the @@ -395,7 +410,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, default: return X86::reloc_riprel_4byte; case X86::MOV64rm: - assert(Rex); + assert(HasREX); return X86::reloc_riprel_4byte_movq_load; case X86::CALL64m: case X86::JMP64m: @@ -409,8 +424,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, case X86::SBB64rm: case X86::SUB64rm: case X86::XOR64rm: - return Rex ? X86::reloc_riprel_4byte_relax_rex - : X86::reloc_riprel_4byte_relax; + return HasREX ? X86::reloc_riprel_4byte_relax_rex + : X86::reloc_riprel_4byte_relax; } }(); @@ -425,7 +440,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, ? X86II::getSizeOfImm(TSFlags) : 0; - emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS, + emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS, Fixups, -ImmSize); return; } @@ -472,23 +487,23 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, if (Disp.isImm() && isDisp8(Disp.getImm())) { if (Disp.getImm() == 0 && RMfield != 6) { // There is no displacement; just the register. - emitByte(modRMByte(0, RegOpcodeField, RMfield), CurByte, OS); + emitByte(modRMByte(0, RegOpcodeField, RMfield), OS); return; } // Use the [REG]+disp8 form, including for [BP] which cannot be encoded. - emitByte(modRMByte(1, RegOpcodeField, RMfield), CurByte, OS); - emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); + emitByte(modRMByte(1, RegOpcodeField, RMfield), OS); + emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups); return; } // This is the [REG]+disp16 case. - emitByte(modRMByte(2, RegOpcodeField, RMfield), CurByte, OS); + emitByte(modRMByte(2, RegOpcodeField, RMfield), OS); } else { // There is no BaseReg; this is the plain [disp16] case. - emitByte(modRMByte(0, RegOpcodeField, 6), CurByte, OS); + emitByte(modRMByte(0, RegOpcodeField, 6), OS); } // Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases. - emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups); + emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, StartByte, OS, Fixups); return; } @@ -498,7 +513,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, // 2-7) and absolute references. if ( // The SIB byte must be used if there is an index register. - IndexReg.getReg() == 0 && + !ForceSIB && IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is // present. @@ -508,8 +523,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, (!STI.hasFeature(X86::Mode64Bit) || BaseReg != 0)) { if (BaseReg == 0) { // [disp32] in X86-32 mode - emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS); - emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups); + emitByte(modRMByte(0, RegOpcodeField, 5), OS); + emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, OS, Fixups); return; } @@ -519,7 +534,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, // by emitting a displacement of 0 below. if (BaseRegNo != N86::EBP) { if (Disp.isImm() && Disp.getImm() == 0) { - emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS); + emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS); return; } @@ -530,7 +545,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, // This is exclusively used by call *a@tlscall(base). The relocation // (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning. Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc())); - emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS); + emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS); return; } } @@ -539,27 +554,27 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm()) { if (!HasEVEX && isDisp8(Disp.getImm())) { - emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); - emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); + emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS); + emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups); return; } // Try EVEX compressed 8-bit displacement first; if failed, fall back to // 32-bit displacement. int CDisp8 = 0; if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) { - emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); - emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups, + emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS); + emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups, CDisp8 - Disp.getImm()); return; } } // Otherwise, emit the most general non-SIB encoding: [REG+disp32] - emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); + emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), OS); unsigned Opcode = MI.getOpcode(); unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax : X86::reloc_signed_4byte; - emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS, + emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS, Fixups); return; } @@ -575,30 +590,30 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, if (BaseReg == 0) { // If there is no base register, we emit the special case SIB byte with // MOD=0, BASE=5, to JUST get the index, scale, and displacement. - emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS); + emitByte(modRMByte(0, RegOpcodeField, 4), OS); ForceDisp32 = true; } else if (!Disp.isImm()) { // Emit the normal disp32 encoding. - emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS); + emitByte(modRMByte(2, RegOpcodeField, 4), OS); ForceDisp32 = true; } else if (Disp.getImm() == 0 && // Base reg can't be anything that ends up with '5' as the base // reg, it is the magic [*] nomenclature that indicates no base. BaseRegNo != N86::EBP) { // Emit no displacement ModR/M byte - emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS); + emitByte(modRMByte(0, RegOpcodeField, 4), OS); } else if (!HasEVEX && isDisp8(Disp.getImm())) { // Emit the disp8 encoding. - emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS); + emitByte(modRMByte(1, RegOpcodeField, 4), OS); ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP } else if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) { // Emit the disp8 encoding. - emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS); + emitByte(modRMByte(1, RegOpcodeField, 4), OS); ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP ImmOffset = CDisp8 - Disp.getImm(); } else { // Emit the normal disp32 encoding. - emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS); + emitByte(modRMByte(2, RegOpcodeField, 4), OS); } // Calculate what the SS field value should be... @@ -613,77 +628,78 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, IndexRegNo = getX86RegNum(IndexReg); else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) IndexRegNo = 4; - emitSIBByte(SS, IndexRegNo, 5, CurByte, OS); + emitSIBByte(SS, IndexRegNo, 5, OS); } else { unsigned IndexRegNo; if (IndexReg.getReg()) IndexRegNo = getX86RegNum(IndexReg); else IndexRegNo = 4; // For example [ESP+1*<noreg>+4] - emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), CurByte, OS); + emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), OS); } // Do we need to output a displacement? if (ForceDisp8) - emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups, + emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups, ImmOffset); else if (ForceDisp32 || Disp.getImm() != 0) emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), - CurByte, OS, Fixups); + StartByte, OS, Fixups); } -void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, - unsigned &CurByte, bool &Rex, - const MCInst &MI, const MCInstrDesc &Desc, - const MCSubtargetInfo &STI, - raw_ostream &OS) const { +/// Emit all instruction prefixes. +/// +/// \returns true if REX prefix is used, otherwise returns false. +bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI, + const MCSubtargetInfo &STI, + raw_ostream &OS) const { + uint64_t TSFlags = MCII.get(MI.getOpcode()).TSFlags; // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); - if (MemoryOperand != -1) - MemoryOperand += CurOp; - // Emit segment override opcode prefix as needed. - if (MemoryOperand >= 0) - emitSegmentOverridePrefix(CurByte, MemoryOperand + X86::AddrSegmentReg, MI, - OS); + if (MemoryOperand != -1) { + MemoryOperand += CurOp; + emitSegmentOverridePrefix(MemoryOperand + X86::AddrSegmentReg, MI, OS); + } // Emit the repeat opcode prefix as needed. unsigned Flags = MI.getFlags(); if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT) - emitByte(0xF3, CurByte, OS); + emitByte(0xF3, OS); if (Flags & X86::IP_HAS_REPEAT_NE) - emitByte(0xF2, CurByte, OS); + emitByte(0xF2, OS); // Emit the address size opcode prefix as needed. - bool need_address_override; + bool NeedAddressOverride; uint64_t AdSize = TSFlags & X86II::AdSizeMask; if ((STI.hasFeature(X86::Mode16Bit) && AdSize == X86II::AdSize32) || (STI.hasFeature(X86::Mode32Bit) && AdSize == X86II::AdSize16) || (STI.hasFeature(X86::Mode64Bit) && AdSize == X86II::AdSize32)) { - need_address_override = true; + NeedAddressOverride = true; } else if (MemoryOperand < 0) { - need_address_override = false; + NeedAddressOverride = false; } else if (STI.hasFeature(X86::Mode64Bit)) { assert(!is16BitMemOperand(MI, MemoryOperand, STI)); - need_address_override = is32BitMemOperand(MI, MemoryOperand); + NeedAddressOverride = is32BitMemOperand(MI, MemoryOperand); } else if (STI.hasFeature(X86::Mode32Bit)) { assert(!is64BitMemOperand(MI, MemoryOperand)); - need_address_override = is16BitMemOperand(MI, MemoryOperand, STI); + NeedAddressOverride = is16BitMemOperand(MI, MemoryOperand, STI); } else { assert(STI.hasFeature(X86::Mode16Bit)); assert(!is64BitMemOperand(MI, MemoryOperand)); - need_address_override = !is16BitMemOperand(MI, MemoryOperand, STI); + NeedAddressOverride = !is16BitMemOperand(MI, MemoryOperand, STI); } - if (need_address_override) - emitByte(0x67, CurByte, OS); + if (NeedAddressOverride) + emitByte(0x67, OS); // Encoding type for this instruction. uint64_t Encoding = TSFlags & X86II::EncodingMask; - if (Encoding == 0) - Rex = emitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, STI, OS); + bool HasREX = false; + if (Encoding) + emitVEXOpcodePrefix(MemoryOperand, MI, OS); else - emitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS); uint64_t Form = TSFlags & X86II::FormMask; switch (Form) { @@ -697,11 +713,11 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, "SI and DI register sizes do not match"); // Emit segment override opcode prefix as needed (not for %ds). if (MI.getOperand(2).getReg() != X86::DS) - emitSegmentOverridePrefix(CurByte, 2, MI, OS); + emitSegmentOverridePrefix(2, MI, OS); // Emit AdSize prefix as needed. if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) || (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI)) - emitByte(0x67, CurByte, OS); + emitByte(0x67, OS); CurOp += 3; // Consume operands. break; } @@ -709,11 +725,11 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, unsigned siReg = MI.getOperand(0).getReg(); // Emit segment override opcode prefix as needed (not for %ds). if (MI.getOperand(1).getReg() != X86::DS) - emitSegmentOverridePrefix(CurByte, 1, MI, OS); + emitSegmentOverridePrefix(1, MI, OS); // Emit AdSize prefix as needed. if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) || (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI)) - emitByte(0x67, CurByte, OS); + emitByte(0x67, OS); CurOp += 2; // Consume operands. break; } @@ -722,24 +738,26 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, // Emit AdSize prefix as needed. if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::EDI) || (STI.hasFeature(X86::Mode32Bit) && siReg == X86::DI)) - emitByte(0x67, CurByte, OS); + emitByte(0x67, OS); ++CurOp; // Consume operand. break; } case X86II::RawFrmMemOffs: { // Emit segment override opcode prefix as needed. - emitSegmentOverridePrefix(CurByte, 1, MI, OS); + emitSegmentOverridePrefix(1, MI, OS); break; } } + + return HasREX; } -/// emitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix -/// called VEX. -void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, - int MemOperand, const MCInst &MI, - const MCInstrDesc &Desc, +/// AVX instructions are encoded using a opcode prefix called VEX. +void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, raw_ostream &OS) const { + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + assert(!(TSFlags & X86II::LOCK) && "Can't have LOCK VEX."); uint64_t Encoding = TSFlags & X86II::EncodingMask; @@ -868,8 +886,11 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, switch (TSFlags & X86II::FormMask) { default: llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!"); + case X86II::MRM_C0: case X86II::RawFrm: + case X86II::PrefixByte: break; + case X86II::MRMDestMemFSIB: case X86II::MRMDestMem: { // MRMDestMem instructions forms: // MemAddr, src1(ModR/M) @@ -900,6 +921,7 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EVEX_R2 = ~(RegEnc >> 4) & 1; break; } + case X86II::MRMSrcMemFSIB: case X86II::MRMSrcMem: { // MRMSrcMem instructions forms: // src1(ModR/M), MemAddr @@ -1081,6 +1103,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EncodeRC = true; break; } + case X86II::MRMr0: { + // MRMr0 instructions forms: + // 11:rrr:000 + // dst(ModR/M) + unsigned RegEnc = getX86RegEncoding(MI, CurOp++); + VEX_R = ~(RegEnc >> 3) & 1; + EVEX_R2 = ~(RegEnc >> 4) & 1; + break; + } case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: @@ -1127,15 +1158,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Can we use the 2 byte VEX prefix? if (!(MI.getFlags() & X86::IP_USE_VEX3) && Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { - emitByte(0xC5, CurByte, OS); - emitByte(LastByte | (VEX_R << 7), CurByte, OS); + emitByte(0xC5, OS); + emitByte(LastByte | (VEX_R << 7), OS); return; } // 3 byte VEX prefix - emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, CurByte, OS); - emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); - emitByte(LastByte | (VEX_W << 7), CurByte, OS); + emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, OS); + emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, OS); + emitByte(LastByte | (VEX_W << 7), OS); } else { assert(Encoding == X86II::EVEX && "unknown encoding!"); // EVEX opcode prefix can have 4 bytes @@ -1146,144 +1177,137 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, assert((VEX_5M & 0x3) == VEX_5M && "More than 2 significant bits in VEX.m-mmmm fields for EVEX!"); - emitByte(0x62, CurByte, OS); + emitByte(0x62, OS); emitByte((VEX_R << 7) | (VEX_X << 6) | (VEX_B << 5) | (EVEX_R2 << 4) | VEX_5M, - CurByte, OS); - emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, CurByte, OS); + emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, OS); if (EncodeRC) emitByte((EVEX_z << 7) | (EVEX_rc << 5) | (EVEX_b << 4) | (EVEX_V2 << 3) | EVEX_aaa, - CurByte, OS); + OS); else emitByte((EVEX_z << 7) | (EVEX_L2 << 6) | (VEX_L << 5) | (EVEX_b << 4) | (EVEX_V2 << 3) | EVEX_aaa, - CurByte, OS); + OS); } } -/// Determine if the MCInst has to be encoded with a X86-64 REX prefix which -/// specifies 1) 64-bit instructions, 2) non-default operand size, and 3) use -/// of X86-64 extended registers. -uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI, uint64_t TSFlags, - int MemOperand, - const MCInstrDesc &Desc) const { - uint8_t REX = 0; - bool UsesHighByteReg = false; - - if (TSFlags & X86II::REX_W) - REX |= 1 << 3; // set REX.W +/// Emit REX prefix which specifies +/// 1) 64-bit instructions, +/// 2) non-default operand size, and +/// 3) use of X86-64 extended registers. +/// +/// \returns true if REX prefix is used, otherwise returns false. +bool X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI, + raw_ostream &OS) const { + uint8_t REX = [&, MemOperand]() { + uint8_t REX = 0; + bool UsesHighByteReg = false; + + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::REX_W) + REX |= 1 << 3; // set REX.W + + if (MI.getNumOperands() == 0) + return REX; + + unsigned NumOps = MI.getNumOperands(); + unsigned CurOp = X86II::getOperandBias(Desc); + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + for (unsigned i = CurOp; i != NumOps; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) + UsesHighByteReg = true; + if (X86II::isX86_64NonExtLowByteReg(Reg)) + // FIXME: The caller of determineREXPrefix slaps this prefix onto + // anything that returns non-zero. + REX |= 0x40; // REX fixed encoding prefix + } - if (MI.getNumOperands() == 0) + switch (TSFlags & X86II::FormMask) { + case X86II::AddRegFrm: + REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B + break; + case X86II::MRMSrcReg: + case X86II::MRMSrcRegCC: + REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R + REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B + break; + case X86II::MRMSrcMem: + case X86II::MRMSrcMemCC: + REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R + REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B + REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X + CurOp += X86::AddrNumOperands; + break; + case X86II::MRMDestReg: + REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B + REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R + break; + case X86II::MRMDestMem: + REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B + REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X + CurOp += X86::AddrNumOperands; + REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R + break; + case X86II::MRMXmCC: + case X86II::MRMXm: + case X86II::MRM0m: + case X86II::MRM1m: + case X86II::MRM2m: + case X86II::MRM3m: + case X86II::MRM4m: + case X86II::MRM5m: + case X86II::MRM6m: + case X86II::MRM7m: + REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B + REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X + break; + case X86II::MRMXrCC: + case X86II::MRMXr: + case X86II::MRM0r: + case X86II::MRM1r: + case X86II::MRM2r: + case X86II::MRM3r: + case X86II::MRM4r: + case X86II::MRM5r: + case X86II::MRM6r: + case X86II::MRM7r: + REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B + break; + case X86II::MRMr0: + REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R + break; + case X86II::MRMDestMemFSIB: + llvm_unreachable("FSIB format never need REX prefix!"); + } + if (REX && UsesHighByteReg) + report_fatal_error( + "Cannot encode high byte register in REX-prefixed instruction"); return REX; + }(); - unsigned NumOps = MI.getNumOperands(); - unsigned CurOp = X86II::getOperandBias(Desc); - - // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. - for (unsigned i = CurOp; i != NumOps; ++i) { - const MCOperand &MO = MI.getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) - UsesHighByteReg = true; - if (X86II::isX86_64NonExtLowByteReg(Reg)) - // FIXME: The caller of determineREXPrefix slaps this prefix onto anything - // that returns non-zero. - REX |= 0x40; // REX fixed encoding prefix - } - - switch (TSFlags & X86II::FormMask) { - case X86II::AddRegFrm: - REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B - break; - case X86II::MRMSrcReg: - case X86II::MRMSrcRegCC: - REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R - REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B - break; - case X86II::MRMSrcMem: - case X86II::MRMSrcMemCC: - REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R - REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B - REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X - CurOp += X86::AddrNumOperands; - break; - case X86II::MRMDestReg: - REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B - REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R - break; - case X86II::MRMDestMem: - REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B - REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X - CurOp += X86::AddrNumOperands; - REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R - break; - case X86II::MRMXmCC: - case X86II::MRMXm: - case X86II::MRM0m: - case X86II::MRM1m: - case X86II::MRM2m: - case X86II::MRM3m: - case X86II::MRM4m: - case X86II::MRM5m: - case X86II::MRM6m: - case X86II::MRM7m: - REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B - REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X - break; - case X86II::MRMXrCC: - case X86II::MRMXr: - case X86II::MRM0r: - case X86II::MRM1r: - case X86II::MRM2r: - case X86II::MRM3r: - case X86II::MRM4r: - case X86II::MRM5r: - case X86II::MRM6r: - case X86II::MRM7r: - REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B - break; - } - if (REX && UsesHighByteReg) - report_fatal_error( - "Cannot encode high byte register in REX-prefixed instruction"); + if (!REX) + return false; - return REX; + emitByte(0x40 | REX, OS); + return true; } /// Emit segment override opcode prefix as needed. -void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte, - unsigned SegOperand, +void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI, raw_ostream &OS) const { // Check for explicit segment override on memory operand. - switch (MI.getOperand(SegOperand).getReg()) { - default: - llvm_unreachable("Unknown segment register!"); - case 0: - break; - case X86::CS: - emitByte(0x2E, CurByte, OS); - break; - case X86::SS: - emitByte(0x36, CurByte, OS); - break; - case X86::DS: - emitByte(0x3E, CurByte, OS); - break; - case X86::ES: - emitByte(0x26, CurByte, OS); - break; - case X86::FS: - emitByte(0x64, CurByte, OS); - break; - case X86::GS: - emitByte(0x65, CurByte, OS); - break; - } + if (unsigned Reg = MI.getOperand(SegOperand).getReg()) + emitByte(X86::getSegmentOverridePrefixForReg(Reg), OS); } /// Emit all instruction prefixes prior to the opcode. @@ -1291,48 +1315,44 @@ void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte, /// \param MemOperand the operand # of the start of a memory operand if present. /// If not present, it is -1. /// -/// \returns true if a REX prefix was used. -bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, - int MemOperand, const MCInst &MI, - const MCInstrDesc &Desc, +/// \returns true if REX prefix is used, otherwise returns false. +bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI, const MCSubtargetInfo &STI, raw_ostream &OS) const { - bool Ret = false; + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + // Emit the operand size opcode prefix as needed. if ((TSFlags & X86II::OpSizeMask) == (STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16)) - emitByte(0x66, CurByte, OS); + emitByte(0x66, OS); // Emit the LOCK opcode prefix. if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK) - emitByte(0xF0, CurByte, OS); + emitByte(0xF0, OS); // Emit the NOTRACK opcode prefix. if (TSFlags & X86II::NOTRACK || MI.getFlags() & X86::IP_HAS_NOTRACK) - emitByte(0x3E, CurByte, OS); + emitByte(0x3E, OS); switch (TSFlags & X86II::OpPrefixMask) { case X86II::PD: // 66 - emitByte(0x66, CurByte, OS); + emitByte(0x66, OS); break; case X86II::XS: // F3 - emitByte(0xF3, CurByte, OS); + emitByte(0xF3, OS); break; case X86II::XD: // F2 - emitByte(0xF2, CurByte, OS); + emitByte(0xF2, OS); break; } // Handle REX prefix. - // FIXME: Can this come before F2 etc to simplify emission? - if (STI.hasFeature(X86::Mode64Bit)) { - if (uint8_t REX = determineREXPrefix(MI, TSFlags, MemOperand, Desc)) { - emitByte(0x40 | REX, CurByte, OS); - Ret = true; - } - } else { - assert(!(TSFlags & X86II::REX_W) && "REX.W requires 64bit mode."); - } + assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) && + "REX.W requires 64bit mode."); + bool HasREX = STI.hasFeature(X86::Mode64Bit) + ? emitREXPrefix(MemOperand, MI, OS) + : false; // 0x0F escape code must be emitted just before the opcode. switch (TSFlags & X86II::OpMapMask) { @@ -1340,19 +1360,20 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::T8: // 0F 38 case X86II::TA: // 0F 3A case X86II::ThreeDNow: // 0F 0F, second 0F emitted by caller. - emitByte(0x0F, CurByte, OS); + emitByte(0x0F, OS); break; } switch (TSFlags & X86II::OpMapMask) { case X86II::T8: // 0F 38 - emitByte(0x38, CurByte, OS); + emitByte(0x38, OS); break; case X86II::TA: // 0F 3A - emitByte(0x3A, CurByte, OS); + emitByte(0x3A, OS); break; } - return Ret; + + return HasREX; } void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS, @@ -1362,16 +1383,12 @@ void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS, uint64_t TSFlags = Desc.TSFlags; // Pseudo instructions don't get encoded. - if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + if (X86II::isPseudo(TSFlags)) return; unsigned CurOp = X86II::getOperandBias(Desc); - // Keep track of the current byte being emitted. - unsigned CurByte = 0; - - bool Rex = false; - emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS); + emitPrefixImpl(CurOp, MI, STI, OS); } void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -1382,17 +1399,15 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, uint64_t TSFlags = Desc.TSFlags; // Pseudo instructions don't get encoded. - if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + if (X86II::isPseudo(TSFlags)) return; unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = X86II::getOperandBias(Desc); - // Keep track of the current byte being emitted. - unsigned CurByte = 0; + uint64_t StartByte = OS.tell(); - bool Rex = false; - emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS); + bool HasREX = emitPrefixImpl(CurOp, MI, STI, OS); // It uses the VEX.VVVV field? bool HasVEX_4V = TSFlags & X86II::VEX_4V; @@ -1422,7 +1437,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::RawFrmDstSrc: case X86II::RawFrmSrc: case X86II::RawFrmDst: - emitByte(BaseOpcode, CurByte, OS); + case X86II::PrefixByte: + emitByte(BaseOpcode, OS); break; case X86II::AddCCFrm: { // This will be added to the opcode in the fallthrough. @@ -1431,47 +1447,47 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, --NumOps; // Drop the operand from the end. LLVM_FALLTHROUGH; case X86II::RawFrm: - emitByte(BaseOpcode + OpcodeOffset, CurByte, OS); + emitByte(BaseOpcode + OpcodeOffset, OS); if (!STI.hasFeature(X86::Mode64Bit) || !isPCRel32Branch(MI, MCII)) break; const MCOperand &Op = MI.getOperand(CurOp++); emitImmediate(Op, MI.getLoc(), X86II::getSizeOfImm(TSFlags), - MCFixupKind(X86::reloc_branch_4byte_pcrel), CurByte, OS, + MCFixupKind(X86::reloc_branch_4byte_pcrel), StartByte, OS, Fixups); break; } case X86II::RawFrmMemOffs: - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), - CurByte, OS, Fixups); + StartByte, OS, Fixups); ++CurOp; // skip segment operand break; case X86II::RawFrmImm8: - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), - CurByte, OS, Fixups); - emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte, + StartByte, OS, Fixups); + emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups); break; case X86II::RawFrmImm16: - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), - CurByte, OS, Fixups); - emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte, + StartByte, OS, Fixups); + emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, StartByte, OS, Fixups); break; case X86II::AddRegFrm: - emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); + emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), OS); break; case X86II::MRMDestReg: { - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); unsigned SrcRegNum = CurOp + 1; if (HasEVEX_K) // Skip writemask @@ -1481,12 +1497,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, ++SrcRegNum; emitRegModRMByte(MI.getOperand(CurOp), - getX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + getX86RegNum(MI.getOperand(SrcRegNum)), OS); CurOp = SrcRegNum + 1; break; } + case X86II::MRMDestMemFSIB: case X86II::MRMDestMem: { - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); unsigned SrcRegNum = CurOp + X86::AddrNumOperands; if (HasEVEX_K) // Skip writemask @@ -1495,13 +1512,14 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + bool ForceSIB = (Form == X86II::MRMDestMemFSIB); emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, - Rex, CurByte, OS, Fixups, STI); + HasREX, StartByte, OS, Fixups, STI, ForceSIB); CurOp = SrcRegNum + 1; break; } case X86II::MRMSrcReg: { - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); unsigned SrcRegNum = CurOp + 1; if (HasEVEX_K) // Skip writemask @@ -1511,7 +1529,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, ++SrcRegNum; emitRegModRMByte(MI.getOperand(SrcRegNum), - getX86RegNum(MI.getOperand(CurOp)), CurByte, OS); + getX86RegNum(MI.getOperand(CurOp)), OS); CurOp = SrcRegNum + 1; if (HasVEX_I8Reg) I8RegNum = getX86RegEncoding(MI, CurOp++); @@ -1521,17 +1539,17 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, break; } case X86II::MRMSrcReg4VOp3: { - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); unsigned SrcRegNum = CurOp + 1; emitRegModRMByte(MI.getOperand(SrcRegNum), - getX86RegNum(MI.getOperand(CurOp)), CurByte, OS); + getX86RegNum(MI.getOperand(CurOp)), OS); CurOp = SrcRegNum + 1; ++CurOp; // Encoded in VEX.VVVV break; } case X86II::MRMSrcRegOp4: { - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); unsigned SrcRegNum = CurOp + 1; // Skip 1st src (which is encoded in VEX_VVVV) @@ -1542,7 +1560,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, I8RegNum = getX86RegEncoding(MI, SrcRegNum++); emitRegModRMByte(MI.getOperand(SrcRegNum), - getX86RegNum(MI.getOperand(CurOp)), CurByte, OS); + getX86RegNum(MI.getOperand(CurOp)), OS); CurOp = SrcRegNum + 1; break; } @@ -1551,12 +1569,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned SecondOp = CurOp++; unsigned CC = MI.getOperand(CurOp++).getImm(); - emitByte(BaseOpcode + CC, CurByte, OS); + emitByte(BaseOpcode + CC, OS); emitRegModRMByte(MI.getOperand(SecondOp), - getX86RegNum(MI.getOperand(FirstOp)), CurByte, OS); + getX86RegNum(MI.getOperand(FirstOp)), OS); break; } + case X86II::MRMSrcMemFSIB: case X86II::MRMSrcMem: { unsigned FirstMemOp = CurOp + 1; @@ -1566,10 +1585,11 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, if (HasVEX_4V) ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); + bool ForceSIB = (Form == X86II::MRMSrcMemFSIB); emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)), - TSFlags, Rex, CurByte, OS, Fixups, STI); + TSFlags, HasREX, StartByte, OS, Fixups, STI, ForceSIB); CurOp = FirstMemOp + X86::AddrNumOperands; if (HasVEX_I8Reg) I8RegNum = getX86RegEncoding(MI, CurOp++); @@ -1578,10 +1598,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMSrcMem4VOp3: { unsigned FirstMemOp = CurOp + 1; - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)), - TSFlags, Rex, CurByte, OS, Fixups, STI); + TSFlags, HasREX, StartByte, OS, Fixups, STI); CurOp = FirstMemOp + X86::AddrNumOperands; ++CurOp; // Encoded in VEX.VVVV. break; @@ -1595,10 +1615,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg"); I8RegNum = getX86RegEncoding(MI, FirstMemOp++); - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)), - TSFlags, Rex, CurByte, OS, Fixups, STI); + TSFlags, HasREX, StartByte, OS, Fixups, STI); CurOp = FirstMemOp + X86::AddrNumOperands; break; } @@ -1608,10 +1628,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, CurOp = FirstMemOp + X86::AddrNumOperands; unsigned CC = MI.getOperand(CurOp++).getImm(); - emitByte(BaseOpcode + CC, CurByte, OS); + emitByte(BaseOpcode + CC, OS); emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)), - TSFlags, Rex, CurByte, OS, Fixups, STI); + TSFlags, HasREX, StartByte, OS, Fixups, STI); break; } @@ -1619,8 +1639,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned RegOp = CurOp++; unsigned CC = MI.getOperand(CurOp++).getImm(); - emitByte(BaseOpcode + CC, CurByte, OS); - emitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS); + emitByte(BaseOpcode + CC, OS); + emitRegModRMByte(MI.getOperand(RegOp), 0, OS); break; } @@ -1637,10 +1657,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, ++CurOp; if (HasEVEX_K) // Skip writemask ++CurOp; - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); emitRegModRMByte(MI.getOperand(CurOp++), - (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, CurByte, - OS); + (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, OS); + break; + case X86II::MRMr0: + emitByte(BaseOpcode, OS); + emitByte(modRMByte(3, getX86RegNum(MI.getOperand(CurOp++)),0), OS); break; case X86II::MRMXmCC: { @@ -1648,9 +1671,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, CurOp = FirstMemOp + X86::AddrNumOperands; unsigned CC = MI.getOperand(CurOp++).getImm(); - emitByte(BaseOpcode + CC, CurByte, OS); + emitByte(BaseOpcode + CC, OS); - emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI); + emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, HasREX, StartByte, OS, Fixups, + STI); break; } @@ -1667,13 +1691,25 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, ++CurOp; if (HasEVEX_K) // Skip writemask ++CurOp; - emitByte(BaseOpcode, CurByte, OS); + emitByte(BaseOpcode, OS); emitMemModRMByte(MI, CurOp, (Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags, - Rex, CurByte, OS, Fixups, STI); + HasREX, StartByte, OS, Fixups, STI); CurOp += X86::AddrNumOperands; break; + case X86II::MRM0X: + case X86II::MRM1X: + case X86II::MRM2X: + case X86II::MRM3X: + case X86II::MRM4X: + case X86II::MRM5X: + case X86II::MRM6X: + case X86II::MRM7X: + emitByte(BaseOpcode, OS); + emitByte(0xC0 + ((Form - X86II::MRM0X) << 3), OS); + break; + case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2: @@ -1738,8 +1774,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM_FD: case X86II::MRM_FE: case X86II::MRM_FF: - emitByte(BaseOpcode, CurByte, OS); - emitByte(0xC0 + Form - X86II::MRM_C0, CurByte, OS); + emitByte(BaseOpcode, OS); + emitByte(0xC0 + Form - X86II::MRM_C0, OS); break; } @@ -1754,7 +1790,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, I8RegNum |= Val; } emitImmediate(MCOperand::createImm(I8RegNum), MI.getLoc(), 1, FK_Data_1, - CurByte, OS, Fixups); + StartByte, OS, Fixups); } else { // If there is a remaining operand, it must be a trailing immediate. Emit it // according to the right size for the instruction. Some instructions @@ -1762,13 +1798,15 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, while (CurOp != NumOps && NumOps - CurOp <= 2) { emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), - CurByte, OS, Fixups); + StartByte, OS, Fixups); } } if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow) - emitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS); + emitByte(X86II::getBaseOpcodeFor(TSFlags), OS); + assert(OS.tell() - StartByte <= 15 && + "The size of instruction must be no longer than 15."); #ifndef NDEBUG // FIXME: Verify. if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 049a3a8159841..81110ba666e95 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -30,10 +30,6 @@ #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" -#if _MSC_VER -#include <intrin.h> -#endif - using namespace llvm; #define GET_REGINFO_MC_DESC @@ -294,7 +290,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT, if (!FS.empty()) ArchFS = (Twine(ArchFS) + "," + FS).str(); - std::string CPUName = CPU; + std::string CPUName = std::string(CPU); if (CPUName.empty()) CPUName = "generic"; @@ -335,7 +331,10 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, MAI = new X86ELFMCAsmInfo(TheTriple); } else if (TheTriple.isWindowsMSVCEnvironment() || TheTriple.isWindowsCoreCLREnvironment()) { - MAI = new X86MCAsmInfoMicrosoft(TheTriple); + if (Options.getAssemblyLanguage().equals_lower("masm")) + MAI = new X86MCAsmInfoMicrosoftMASM(TheTriple); + else + MAI = new X86MCAsmInfoMicrosoft(TheTriple); } else if (TheTriple.isOSCygMing() || TheTriple.isWindowsItaniumEnvironment()) { MAI = new X86MCAsmInfoGNUCOFF(TheTriple); @@ -350,7 +349,7 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, // Initial state of the frame pointer is esp+stackGrowth. unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP; - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa( nullptr, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth); MAI->addInitialFrameState(Inst); @@ -401,6 +400,9 @@ public: findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents, uint64_t GotSectionVA, const Triple &TargetTriple) const override; + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override; Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size) const override; @@ -519,6 +521,15 @@ std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries( } } +bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { + if (Inst.getNumOperands() == 0 || + Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL) + return false; + Target = Addr + Size + Inst.getOperand(0).getImm(); + return true; +} + Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress( const MCInst &Inst, uint64_t Addr, uint64_t Size) const { const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 0c789061f0e13..e8c72be1d9b6f 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -13,27 +13,28 @@ #ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H #define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H -#include "llvm/MC/MCRegister.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/DataTypes.h" +#include <memory> #include <string> namespace llvm { +class formatted_raw_ostream; class MCAsmBackend; class MCCodeEmitter; class MCContext; +class MCInst; +class MCInstPrinter; class MCInstrInfo; class MCObjectTargetWriter; class MCObjectWriter; +class MCRegister; class MCRegisterInfo; +class MCStreamer; class MCSubtargetInfo; -class MCRelocationInfo; class MCTargetOptions; +class MCTargetStreamer; class Target; class Triple; class StringRef; -class raw_ostream; -class raw_pwrite_stream; /// Flavour of dwarf regnumbers /// diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp new file mode 100644 index 0000000000000..62c1c399a606e --- /dev/null +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp @@ -0,0 +1,571 @@ +//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics into a +// generic vector mask. +// +//===----------------------------------------------------------------------===// + +#include "X86ShuffleDecode.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +namespace llvm { + +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + // Defaults the copying the dest value. + ShuffleMask.push_back(0); + ShuffleMask.push_back(1); + ShuffleMask.push_back(2); + ShuffleMask.push_back(3); + + // Decode the immediate. + unsigned ZMask = Imm & 15; + unsigned CountD = (Imm >> 4) & 3; + unsigned CountS = (Imm >> 6) & 3; + + // CountS selects which input element to use. + unsigned InVal = 4 + CountS; + // CountD specifies which element of destination to update. + ShuffleMask[CountD] = InVal; + // ZMask zaps values, potentially overriding the CountD elt. + if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; + if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; + if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; + if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; +} + +void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, + SmallVectorImpl<int> &ShuffleMask) { + assert((Idx + Len) <= NumElts && "Insertion out of range"); + + for (unsigned i = 0; i != NumElts; ++i) + ShuffleMask.push_back(i); + for (unsigned i = 0; i != Len; ++i) + ShuffleMask[Idx + i] = NumElts + i; +} + +// <3,1> or <6,7,2,3> +void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { + for (unsigned i = NElts / 2; i != NElts; ++i) + ShuffleMask.push_back(NElts + i); + + for (unsigned i = NElts / 2; i != NElts; ++i) + ShuffleMask.push_back(i); +} + +// <0,2> or <0,1,4,5> +void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { + for (unsigned i = 0; i != NElts / 2; ++i) + ShuffleMask.push_back(i); + + for (unsigned i = 0; i != NElts / 2; ++i) + ShuffleMask.push_back(NElts + i); +} + +void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { + for (int i = 0, e = NumElts / 2; i < e; ++i) { + ShuffleMask.push_back(2 * i); + ShuffleMask.push_back(2 * i); + } +} + +void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { + for (int i = 0, e = NumElts / 2; i < e; ++i) { + ShuffleMask.push_back(2 * i + 1); + ShuffleMask.push_back(2 * i + 1); + } +} + +void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { + const unsigned NumLaneElts = 2; + + for (unsigned l = 0; l < NumElts; l += NumLaneElts) + for (unsigned i = 0; i < NumLaneElts; ++i) + ShuffleMask.push_back(l); +} + +void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + const unsigned NumLaneElts = 16; + + for (unsigned l = 0; l < NumElts; l += NumLaneElts) + for (unsigned i = 0; i < NumLaneElts; ++i) { + int M = SM_SentinelZero; + if (i >= Imm) M = i - Imm + l; + ShuffleMask.push_back(M); + } +} + +void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + const unsigned NumLaneElts = 16; + + for (unsigned l = 0; l < NumElts; l += NumLaneElts) + for (unsigned i = 0; i < NumLaneElts; ++i) { + unsigned Base = i + Imm; + int M = Base + l; + if (Base >= NumLaneElts) M = SM_SentinelZero; + ShuffleMask.push_back(M); + } +} + +void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + const unsigned NumLaneElts = 16; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Base = i + Imm; + // if i+imm is out of this lane then we actually need the other source + if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; + ShuffleMask.push_back(Base + l); + } + } +} + +void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + // Not all bits of the immediate are used so mask it. + assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2"); + Imm = Imm & (NumElts - 1); + for (unsigned i = 0; i != NumElts; ++i) + ShuffleMask.push_back(i + Imm); +} + +void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + unsigned Size = NumElts * ScalarBits; + unsigned NumLanes = Size / 128; + if (NumLanes == 0) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; + + uint32_t SplatImm = (Imm & 0xff) * 0x01010101; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + ShuffleMask.push_back(SplatImm % NumLaneElts + l); + SplatImm /= NumLaneElts; + } + } +} + +void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + for (unsigned l = 0; l != NumElts; l += 8) { + unsigned NewImm = Imm; + for (unsigned i = 0, e = 4; i != e; ++i) { + ShuffleMask.push_back(l + i); + } + for (unsigned i = 4, e = 8; i != e; ++i) { + ShuffleMask.push_back(l + 4 + (NewImm & 3)); + NewImm >>= 2; + } + } +} + +void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + for (unsigned l = 0; l != NumElts; l += 8) { + unsigned NewImm = Imm; + for (unsigned i = 0, e = 4; i != e; ++i) { + ShuffleMask.push_back(l + (NewImm & 3)); + NewImm >>= 2; + } + for (unsigned i = 4, e = 8; i != e; ++i) { + ShuffleMask.push_back(l + i); + } + } +} + +void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { + unsigned NumHalfElts = NumElts / 2; + + for (unsigned l = 0; l != NumHalfElts; ++l) + ShuffleMask.push_back(l + NumHalfElts); + for (unsigned h = 0; h != NumHalfElts; ++h) + ShuffleMask.push_back(h); +} + +void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, + unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + unsigned NumLaneElts = 128 / ScalarBits; + + unsigned NewImm = Imm; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + // each half of a lane comes from different source + for (unsigned s = 0; s != NumElts * 2; s += NumElts) { + for (unsigned i = 0; i != NumLaneElts / 2; ++i) { + ShuffleMask.push_back(NewImm % NumLaneElts + s + l); + NewImm /= NumLaneElts; + } + } + if (NumLaneElts == 4) NewImm = Imm; // reload imm + } +} + +void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl<int> &ShuffleMask) { + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = (NumElts * ScalarBits) / 128; + if (NumLanes == 0) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i + NumElts); // Reads from src/src2 + } + } +} + +void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl<int> &ShuffleMask) { + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = (NumElts * ScalarBits) / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i + NumElts); // Reads from src/src2 + } + } +} + +void DecodeVectorBroadcast(unsigned NumElts, + SmallVectorImpl<int> &ShuffleMask) { + ShuffleMask.append(NumElts, 0); +} + +void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, + SmallVectorImpl<int> &ShuffleMask) { + unsigned Scale = DstNumElts / SrcNumElts; + + for (unsigned i = 0; i != Scale; ++i) + for (unsigned j = 0; j != SrcNumElts; ++j) + ShuffleMask.push_back(j); +} + +void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, + unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElementsInLane = 128 / ScalarSize; + unsigned NumLanes = NumElts / NumElementsInLane; + + for (unsigned l = 0; l != NumElts; l += NumElementsInLane) { + unsigned Index = (Imm % NumLanes) * NumElementsInLane; + Imm /= NumLanes; // Discard the bits we just used. + // We actually need the other source. + if (l >= (NumElts / 2)) + Index += NumElts; + for (unsigned i = 0; i != NumElementsInLane; ++i) + ShuffleMask.push_back(Index + i); + } +} + +void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + unsigned HalfSize = NumElts / 2; + + for (unsigned l = 0; l != 2; ++l) { + unsigned HalfMask = Imm >> (l * 4); + unsigned HalfBegin = (HalfMask & 0x3) * HalfSize; + for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) + ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i); + } +} + +void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask) { + for (int i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + // For 256/512-bit vectors the base of the shuffle is the 128-bit + // subvector we're inside. + int Base = (i / 16) * 16; + // If the high bit (7) of the byte is set, the element is zeroed. + if (M & (1 << 7)) + ShuffleMask.push_back(SM_SentinelZero); + else { + // Only the least significant 4 bits of the byte are used. + int Index = Base + (M & 0xf); + ShuffleMask.push_back(Index); + } + } +} + +void DecodeBLENDMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + for (unsigned i = 0; i < NumElts; ++i) { + // If there are more than 8 elements in the vector, then any immediate blend + // mask wraps around. + unsigned Bit = i % 8; + ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i); + } +} + +void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask) { + assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size"); + + // VPPERM Operation + // Bits[4:0] - Byte Index (0 - 31) + // Bits[7:5] - Permute Operation + // + // Permute Operation: + // 0 - Source byte (no logical operation). + // 1 - Invert source byte. + // 2 - Bit reverse of source byte. + // 3 - Bit reverse of inverted source byte. + // 4 - 00h (zero - fill). + // 5 - FFh (ones - fill). + // 6 - Most significant bit of source byte replicated in all bit positions. + // 7 - Invert most significant bit of source byte and replicate in all bit positions. + for (int i = 0, e = RawMask.size(); i < e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + + uint64_t M = RawMask[i]; + uint64_t PermuteOp = (M >> 5) & 0x7; + if (PermuteOp == 4) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + if (PermuteOp != 0) { + ShuffleMask.clear(); + return; + } + + uint64_t Index = M & 0x1F; + ShuffleMask.push_back((int)Index); + } +} + +void DecodeVPERMMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + for (unsigned l = 0; l != NumElts; l += 4) + for (unsigned i = 0; i != 4; ++i) + ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3)); +} + +void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, + unsigned NumDstElts, bool IsAnyExtend, + SmallVectorImpl<int> &ShuffleMask) { + unsigned Scale = DstScalarBits / SrcScalarBits; + assert(SrcScalarBits < DstScalarBits && + "Expected zero extension mask to increase scalar size"); + + int Sentinel = IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero; + for (unsigned i = 0; i != NumDstElts; i++) { + ShuffleMask.push_back(i); + ShuffleMask.append(Scale - 1, Sentinel); + } +} + +void DecodeZeroMoveLowMask(unsigned NumElts, + SmallVectorImpl<int> &ShuffleMask) { + ShuffleMask.push_back(0); + ShuffleMask.append(NumElts - 1, SM_SentinelZero); +} + +void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, + SmallVectorImpl<int> &ShuffleMask) { + // First element comes from the first element of second source. + // Remaining elements: Load zero extends / Move copies from first source. + ShuffleMask.push_back(NumElts); + for (unsigned i = 1; i < NumElts; i++) + ShuffleMask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); +} + +void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask) { + unsigned HalfElts = NumElts / 2; + + // Only the bottom 6 bits are valid for each immediate. + Len &= 0x3F; + Idx &= 0x3F; + + // We can only decode this bit extraction instruction as a shuffle if both the + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) + return; + + // A length of zero is equivalent to a bit length of 64. + if (Len == 0) + Len = 64; + + // If the length + index exceeds the bottom 64 bits the result is undefined. + if ((Len + Idx) > 64) { + ShuffleMask.append(NumElts, SM_SentinelUndef); + return; + } + + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; + + // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining + // elements of the lower 64-bits. The upper 64-bits are undefined. + for (int i = 0; i != Len; ++i) + ShuffleMask.push_back(i + Idx); + for (int i = Len; i != (int)HalfElts; ++i) + ShuffleMask.push_back(SM_SentinelZero); + for (int i = HalfElts; i != (int)NumElts; ++i) + ShuffleMask.push_back(SM_SentinelUndef); +} + +void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask) { + unsigned HalfElts = NumElts / 2; + + // Only the bottom 6 bits are valid for each immediate. + Len &= 0x3F; + Idx &= 0x3F; + + // We can only decode this bit insertion instruction as a shuffle if both the + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) + return; + + // A length of zero is equivalent to a bit length of 64. + if (Len == 0) + Len = 64; + + // If the length + index exceeds the bottom 64 bits the result is undefined. + if ((Len + Idx) > 64) { + ShuffleMask.append(NumElts, SM_SentinelUndef); + return; + } + + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; + + // INSERTQ: Extract lowest Len elements from lower half of second source and + // insert over first source starting at Idx element. The upper 64-bits are + // undefined. + for (int i = 0; i != Idx; ++i) + ShuffleMask.push_back(i); + for (int i = 0; i != Len; ++i) + ShuffleMask.push_back(i + NumElts); + for (int i = Idx + Len; i != (int)HalfElts; ++i) + ShuffleMask.push_back(i); + for (int i = HalfElts; i != (int)NumElts; ++i) + ShuffleMask.push_back(SM_SentinelUndef); +} + +void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, + ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask) { + unsigned VecSize = NumElts * ScalarBits; + unsigned NumLanes = VecSize / 128; + unsigned NumEltsPerLane = NumElts / NumLanes; + assert((VecSize == 128 || VecSize == 256 || VecSize == 512) && + "Unexpected vector size"); + assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); + + for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t M = RawMask[i]; + M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); + unsigned LaneOffset = i & ~(NumEltsPerLane - 1); + ShuffleMask.push_back((int)(LaneOffset + M)); + } +} + +void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, + ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask) { + unsigned VecSize = NumElts * ScalarBits; + unsigned NumLanes = VecSize / 128; + unsigned NumEltsPerLane = NumElts / NumLanes; + assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size"); + assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); + assert((NumElts == RawMask.size()) && "Unexpected mask size"); + + for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + + // VPERMIL2 Operation. + // Bits[3] - Match Bit. + // Bits[2:1] - (Per Lane) PD Shuffle Mask. + // Bits[2:0] - (Per Lane) PS Shuffle Mask. + uint64_t Selector = RawMask[i]; + unsigned MatchBit = (Selector >> 3) & 0x1; + + // M2Z[0:1] MatchBit + // 0Xb X Source selected by Selector index. + // 10b 0 Source selected by Selector index. + // 10b 1 Zero. + // 11b 0 Zero. + // 11b 1 Source selected by Selector index. + if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + + int Index = i & ~(NumEltsPerLane - 1); + if (ScalarBits == 64) + Index += (Selector >> 1) & 0x1; + else + Index += Selector & 0x3; + + int Src = (Selector >> 2) & 0x1; + Index += Src * NumElts; + ShuffleMask.push_back(Index); + } +} + +void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask) { + uint64_t EltMaskSize = RawMask.size() - 1; + for (int i = 0, e = RawMask.size(); i != e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t M = RawMask[i]; + M &= EltMaskSize; + ShuffleMask.push_back((int)M); + } +} + +void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask) { + uint64_t EltMaskSize = (RawMask.size() * 2) - 1; + for (int i = 0, e = RawMask.size(); i != e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t M = RawMask[i]; + M &= EltMaskSize; + ShuffleMask.push_back((int)M); + } +} + +} // llvm namespace diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h new file mode 100644 index 0000000000000..4ef9959f7a278 --- /dev/null +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h @@ -0,0 +1,166 @@ +//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics into a +// generic vector mask. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H +#define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H + +#include <cstdint> + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +namespace llvm { +class APInt; +template <typename T> class ArrayRef; +template <typename T> class SmallVectorImpl; + +enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; + +/// Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask. +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); + +// Insert the bottom Len elements from a second source into a vector starting at +// element Idx. +void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask. +/// i.e. <3,1> or <6,7,2,3> +void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); + +/// Decode a MOVLHPS instruction as a v2f64/v4f32 shuffle mask. +/// i.e. <0,2> or <0,1,4,5> +void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); + +void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask); + +void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask); + +void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask); + +void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +/// Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps. +void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +/// Decodes the shuffle masks for pshufhw. +void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +/// Decodes the shuffle masks for pshuflw. +void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +/// Decodes a PSWAPD 3DNow! instruction. +void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask); + +/// Decodes the shuffle masks for shufp*. +void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +/// Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*. +void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl<int> &ShuffleMask); + +/// Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*. +void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl<int> &ShuffleMask); + +/// Decodes a broadcast of the first element of a vector. +void DecodeVectorBroadcast(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask); + +/// Decodes a broadcast of a subvector to a larger vector type. +void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a PSHUFB mask from a raw array of constants such as from +/// BUILD_VECTOR. +void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a BLEND immediate mask into a shuffle mask. +void DecodeBLENDMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a shuffle packed values at 128-bit granularity +/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) +/// immediate mask into a shuffle mask. +void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, + unsigned Imm, SmallVectorImpl<int> &ShuffleMask); + +/// Decodes the shuffle masks for VPERMQ/VPERMPD. +void DecodeVPERMMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a VPPERM mask from a raw array of constants such as from +/// BUILD_VECTOR. +/// This can only basic masks (permutes + zeros), not any of the other +/// operations that VPPERM can perform. +void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a zero extension instruction as a shuffle mask. +void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, + unsigned NumDstElts, bool IsAnyExtend, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a move lower and zero upper instruction as a shuffle mask. +void DecodeZeroMoveLowMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask); + +/// Decode a scalar float move instruction as a shuffle mask. +void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a SSE4A EXTRQ instruction as a shuffle mask. +void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a SSE4A INSERTQ instruction as a shuffle mask. +void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. +void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, + ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants. +void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, + ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants. +void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants. +void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, + SmallVectorImpl<int> &ShuffleMask); +} // llvm namespace + +#endif diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index db624378d517b..3bebcc24fd3a4 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -28,7 +28,7 @@ public: void EmitWinEHHandlerData(SMLoc Loc) override; void EmitWindowsUnwindTables() override; void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override; - void FinishImpl() override; + void finishImpl() override; }; void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { @@ -52,11 +52,11 @@ void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { XTS->emitFPOData(ProcSym, Loc); } -void X86WinCOFFStreamer::FinishImpl() { - EmitFrames(nullptr); +void X86WinCOFFStreamer::finishImpl() { + emitFrames(nullptr); EmitWindowsUnwindTables(); - MCWinCOFFStreamer::FinishImpl(); + MCWinCOFFStreamer::finishImpl(); } } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp index d5494ef12370f..11251fb2b2ba7 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp @@ -159,7 +159,7 @@ bool X86WinCOFFTargetStreamer::checkInFPOPrologue(SMLoc L) { MCSymbol *X86WinCOFFTargetStreamer::emitFPOLabel() { MCSymbol *Label = getContext().createTempSymbol("cfi", true); - getStreamer().EmitLabel(Label); + getStreamer().emitLabel(Label); return Label; } @@ -372,13 +372,13 @@ void FPOStateMachine::emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label) { OS.emitAbsoluteSymbolDiff(Label, FPO->Begin, 4); // RvaStart OS.emitAbsoluteSymbolDiff(FPO->End, Label, 4); // CodeSize - OS.EmitIntValue(LocalSize, 4); - OS.EmitIntValue(FPO->ParamsSize, 4); - OS.EmitIntValue(MaxStackSize, 4); - OS.EmitIntValue(FrameFuncStrTabOff, 4); // FrameFunc + OS.emitInt32(LocalSize); + OS.emitInt32(FPO->ParamsSize); + OS.emitInt32(MaxStackSize); + OS.emitInt32(FrameFuncStrTabOff); // FrameFunc OS.emitAbsoluteSymbolDiff(FPO->PrologueEnd, Label, 2); - OS.EmitIntValue(SavedRegSize, 2); - OS.EmitIntValue(CurFlags, 4); + OS.emitInt16(SavedRegSize); + OS.emitInt32(CurFlags); } /// Compute and emit the real CodeView FrameData subsection. @@ -398,12 +398,12 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) { MCSymbol *FrameBegin = Ctx.createTempSymbol(), *FrameEnd = Ctx.createTempSymbol(); - OS.EmitIntValue(unsigned(DebugSubsectionKind::FrameData), 4); + OS.emitInt32(unsigned(DebugSubsectionKind::FrameData)); OS.emitAbsoluteSymbolDiff(FrameEnd, FrameBegin, 4); - OS.EmitLabel(FrameBegin); + OS.emitLabel(FrameBegin); // Start with the RVA of the function in question. - OS.EmitValue(MCSymbolRefExpr::create(FPO->Function, + OS.emitValue(MCSymbolRefExpr::create(FPO->Function, MCSymbolRefExpr::VK_COFF_IMGREL32, Ctx), 4); @@ -437,8 +437,8 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) { FSM.emitFrameDataRecord(OS, Inst.Label); } - OS.EmitValueToAlignment(4, 0); - OS.EmitLabel(FrameEnd); + OS.emitValueToAlignment(4, 0); + OS.emitLabel(FrameEnd); return false; } |