summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/MCTargetDesc
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/MCTargetDesc')
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp5
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h14
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp971
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h107
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp195
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp26
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h4
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp3
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h15
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp18
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h8
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp696
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp25
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h13
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp571
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h166
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp8
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp24
19 files changed, 2135 insertions, 738 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 675a9c377b12d..0134b4efce727 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -56,7 +56,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (MI->getOpcode() == X86::CALLpcrel32 &&
(STI.getFeatureBits()[X86::Mode64Bit])) {
OS << "\tcallq\t";
- printPCRelImm(MI, 0, OS);
+ printPCRelImm(MI, Address, 0, OS);
}
// data16 and data32 both have the same encoding of 0x66. While data32 is
// valid only in 16 bit systems, data16 is valid in the rest.
@@ -68,8 +68,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
OS << "\tdata32";
}
// Try to print any aliases first.
- else if (!printAliasInstr(MI, OS) &&
- !printVecCompareInstr(MI, OS))
+ else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS))
printInstruction(MI, Address, OS);
// Next always print the annotation.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
index 3d5d384dc4a01..51ddae61d2510 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
@@ -30,9 +30,10 @@ public:
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &OS);
@@ -46,13 +47,6 @@ public:
void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
- void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
- void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
-
void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index dffda5217675b..bf3b6bcb5463f 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -12,7 +12,9 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -60,10 +62,9 @@ public:
else if (BranchType == "indirect")
addKind(X86::AlignBranchIndirect);
else {
- report_fatal_error(
- "'-x86-align-branch 'The branches's type is combination of jcc, "
- "fused, jmp, call, ret, indirect.(plus separated)",
- false);
+ errs() << "invalid argument " << BranchType.str()
+ << " to -x86-align-branch=; each element must be one of: fused, "
+ "jcc, jmp, call, ret, indirect.(plus separated)\n";
}
}
}
@@ -85,13 +86,14 @@ cl::opt<unsigned> X86AlignBranchBoundary(
cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
"x86-align-branch",
- cl::desc("Specify types of branches to align (plus separated list of "
- "types). The branches's types are combination of jcc, fused, "
- "jmp, call, ret, indirect."),
- cl::value_desc("jcc indicates conditional jumps, fused indicates fused "
- "conditional jumps, jmp indicates unconditional jumps, call "
- "indicates direct and indirect calls, ret indicates rets, "
- "indirect indicates indirect jumps."),
+ cl::desc(
+ "Specify types of branches to align (plus separated list of types):"
+ "\njcc indicates conditional jumps"
+ "\nfused indicates fused conditional jumps"
+ "\njmp indicates direct unconditional jumps"
+ "\ncall indicates direct and indirect calls"
+ "\nret indicates rets"
+ "\nindirect indicates indirect unconditional jumps"),
cl::location(X86AlignBranchKindLoc));
cl::opt<bool> X86AlignBranchWithin32BBoundaries(
@@ -102,6 +104,18 @@ cl::opt<bool> X86AlignBranchWithin32BBoundaries(
"assumptions about labels corresponding to particular instructions, "
"and should be used with caution."));
+cl::opt<unsigned> X86PadMaxPrefixSize(
+ "x86-pad-max-prefix-size", cl::init(0),
+ cl::desc("Maximum number of prefixes to use for padding"));
+
+cl::opt<bool> X86PadForAlign(
+ "x86-pad-for-align", cl::init(true), cl::Hidden,
+ cl::desc("Pad previous instructions to implement align directives"));
+
+cl::opt<bool> X86PadForBranchAlign(
+ "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
+ cl::desc("Pad previous instructions to implement branch alignment"));
+
class X86ELFObjectWriter : public MCELFObjectTargetWriter {
public:
X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
@@ -114,14 +128,18 @@ class X86AsmBackend : public MCAsmBackend {
std::unique_ptr<const MCInstrInfo> MCII;
X86AlignBranchKind AlignBranchType;
Align AlignBoundary;
+ unsigned TargetPrefixMax = 0;
- bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
-
- bool needAlign(MCObjectStreamer &OS) const;
- bool needAlignInst(const MCInst &Inst) const;
- MCBoundaryAlignFragment *
- getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const;
MCInst PrevInst;
+ MCBoundaryAlignFragment *PendingBA = nullptr;
+ std::pair<MCFragment *, size_t> PrevInstPosition;
+ bool CanPadInst;
+
+ uint8_t determinePaddingPrefix(const MCInst &Inst) const;
+ bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
+ bool needAlign(const MCInst &Inst) const;
+ bool canPadBranches(MCObjectStreamer &OS) const;
+ bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
public:
X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
@@ -142,11 +160,14 @@ public:
AlignBoundary = assumeAligned(X86AlignBranchBoundary);
if (X86AlignBranch.getNumOccurrences())
AlignBranchType = X86AlignBranchKindLoc;
+ if (X86PadMaxPrefixSize.getNumOccurrences())
+ TargetPrefixMax = X86PadMaxPrefixSize;
}
bool allowAutoPadding() const override;
- void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
- void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
+ bool allowEnhancedRelaxation() const override;
+ void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
+ void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
unsigned getNumFixupKinds() const override {
return X86::NumTargetFixupKinds;
@@ -155,7 +176,7 @@ public:
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
-
+
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
@@ -171,22 +192,34 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
+
+ bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
} // end anonymous namespace
-static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool is16BitMode) {
+static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
unsigned Op = Inst.getOpcode();
switch (Op) {
default:
return Op;
case X86::JCC_1:
- return (is16BitMode) ? X86::JCC_2 : X86::JCC_4;
+ return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
case X86::JMP_1:
- return (is16BitMode) ? X86::JMP_2 : X86::JMP_4;
+ return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
}
}
@@ -275,11 +308,11 @@ static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
}
}
-static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) {
+static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
unsigned R = getRelaxedOpcodeArith(Inst);
if (R != Inst.getOpcode())
return R;
- return getRelaxedOpcodeBranch(Inst, is16BitMode);
+ return getRelaxedOpcodeBranch(Inst, Is16BitMode);
}
static X86::CondCode getCondFromBranch(const MCInst &MI,
@@ -316,6 +349,11 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
return (BaseReg == X86::RIP);
}
+/// Check if the instruction is a prefix.
+static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
+ return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
+}
+
/// Check if the instruction is valid as the first instruction in macro fusion.
static bool isFirstMacroFusibleInst(const MCInst &Inst,
const MCInstrInfo &MCII) {
@@ -327,6 +365,69 @@ static bool isFirstMacroFusibleInst(const MCInst &Inst,
return FIK != X86::FirstMacroFusionInstKind::Invalid;
}
+/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
+/// get a better peformance in some cases. Here, we determine which prefix is
+/// the most suitable.
+///
+/// If the instruction has a segment override prefix, use the existing one.
+/// If the target is 64-bit, use the CS.
+/// If the target is 32-bit,
+/// - If the instruction has a ESP/EBP base register, use SS.
+/// - Otherwise use DS.
+uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
+ assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
+ "Prefixes can be added only in 32-bit or 64-bit mode.");
+ const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1)
+ MemoryOperand += X86II::getOperandBias(Desc);
+
+ unsigned SegmentReg = 0;
+ if (MemoryOperand >= 0) {
+ // Check for explicit segment override on memory operand.
+ SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ default:
+ break;
+ case X86II::RawFrmDstSrc: {
+ // Check segment override opcode prefix as needed (not for %ds).
+ if (Inst.getOperand(2).getReg() != X86::DS)
+ SegmentReg = Inst.getOperand(2).getReg();
+ break;
+ }
+ case X86II::RawFrmSrc: {
+ // Check segment override opcode prefix as needed (not for %ds).
+ if (Inst.getOperand(1).getReg() != X86::DS)
+ SegmentReg = Inst.getOperand(1).getReg();
+ break;
+ }
+ case X86II::RawFrmMemOffs: {
+ // Check segment override opcode prefix as needed.
+ SegmentReg = Inst.getOperand(1).getReg();
+ break;
+ }
+ }
+
+ if (SegmentReg != 0)
+ return X86::getSegmentOverridePrefixForReg(SegmentReg);
+
+ if (STI.hasFeature(X86::Mode64Bit))
+ return X86::CS_Encoding;
+
+ if (MemoryOperand >= 0) {
+ unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
+ unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
+ if (BaseReg == X86::ESP || BaseReg == X86::EBP)
+ return X86::SS_Encoding;
+ }
+ return X86::DS_Encoding;
+}
+
/// Check if the two instructions will be macro-fused on the target cpu.
bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
@@ -355,19 +456,122 @@ static bool hasVariantSymbol(const MCInst &MI) {
}
bool X86AsmBackend::allowAutoPadding() const {
- return (AlignBoundary != Align::None() &&
- AlignBranchType != X86::AlignBranchNone);
+ return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
+}
+
+bool X86AsmBackend::allowEnhancedRelaxation() const {
+ return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
+}
+
+/// X86 has certain instructions which enable interrupts exactly one
+/// instruction *after* the instruction which stores to SS. Return true if the
+/// given instruction has such an interrupt delay slot.
+static bool hasInterruptDelaySlot(const MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ case X86::POPSS16:
+ case X86::POPSS32:
+ case X86::STI:
+ return true;
+
+ case X86::MOV16sr:
+ case X86::MOV32sr:
+ case X86::MOV64sr:
+ case X86::MOV16sm:
+ if (Inst.getOperand(0).getReg() == X86::SS)
+ return true;
+ break;
+ }
+ return false;
+}
+
+/// Check if the instruction to be emitted is right after any data.
+static bool
+isRightAfterData(MCFragment *CurrentFragment,
+ const std::pair<MCFragment *, size_t> &PrevInstPosition) {
+ MCFragment *F = CurrentFragment;
+ // Empty data fragments may be created to prevent further data being
+ // added into the previous fragment, we need to skip them since they
+ // have no contents.
+ for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
+ if (cast<MCDataFragment>(F)->getContents().size() != 0)
+ break;
+
+ // Since data is always emitted into a DataFragment, our check strategy is
+ // simple here.
+ // - If the fragment is a DataFragment
+ // - If it's not the fragment where the previous instruction is,
+ // returns true.
+ // - If it's the fragment holding the previous instruction but its
+ // size changed since the the previous instruction was emitted into
+ // it, returns true.
+ // - Otherwise returns false.
+ // - If the fragment is not a DataFragment, returns false.
+ if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
+ return DF != PrevInstPosition.first ||
+ DF->getContents().size() != PrevInstPosition.second;
+
+ return false;
+}
+
+/// \returns the fragment size if it has instructions, otherwise returns 0.
+static size_t getSizeForInstFragment(const MCFragment *F) {
+ if (!F || !F->hasInstructions())
+ return 0;
+ // MCEncodedFragmentWithContents being templated makes this tricky.
+ switch (F->getKind()) {
+ default:
+ llvm_unreachable("Unknown fragment with instructions!");
+ case MCFragment::FT_Data:
+ return cast<MCDataFragment>(*F).getContents().size();
+ case MCFragment::FT_Relaxable:
+ return cast<MCRelaxableFragment>(*F).getContents().size();
+ case MCFragment::FT_CompactEncodedInst:
+ return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
+ }
+}
+
+/// Return true if we can insert NOP or prefixes automatically before the
+/// the instruction to be emitted.
+bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
+ if (hasVariantSymbol(Inst))
+ // Linker may rewrite the instruction with variant symbol operand(e.g.
+ // TLSCALL).
+ return false;
+
+ if (hasInterruptDelaySlot(PrevInst))
+ // If this instruction follows an interrupt enabling instruction with a one
+ // instruction delay, inserting a nop would change behavior.
+ return false;
+
+ if (isPrefix(PrevInst, *MCII))
+ // If this instruction follows a prefix, inserting a nop/prefix would change
+ // semantic.
+ return false;
+
+ if (isPrefix(Inst, *MCII))
+ // If this instruction is a prefix, inserting a prefix would change
+ // semantic.
+ return false;
+
+ if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
+ // If this instruction follows any data, there is no clear
+ // instruction boundary, inserting a nop/prefix would change semantic.
+ return false;
+
+ return true;
}
-bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
+bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
if (!OS.getAllowAutoPadding())
return false;
assert(allowAutoPadding() && "incorrect initialization!");
- MCAssembler &Assembler = OS.getAssembler();
- MCSection *Sec = OS.getCurrentSectionOnly();
+ // We only pad in text section.
+ if (!OS.getCurrentSectionOnly()->getKind().isText())
+ return false;
+
// To be Done: Currently don't deal with Bundle cases.
- if (Assembler.isBundlingEnabled() && Sec->isBundleLocked())
+ if (OS.getAssembler().isBundlingEnabled())
return false;
// Branches only need to be aligned in 32-bit or 64-bit mode.
@@ -377,59 +581,42 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
return true;
}
-/// Check if the instruction operand needs to be aligned. Padding is disabled
-/// before intruction which may be rewritten by linker(e.g. TLSCALL).
-bool X86AsmBackend::needAlignInst(const MCInst &Inst) const {
- // Linker may rewrite the instruction with variant symbol operand.
- if (hasVariantSymbol(Inst))
- return false;
-
- const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode());
- return (InstDesc.isConditionalBranch() &&
+/// Check if the instruction operand needs to be aligned.
+bool X86AsmBackend::needAlign(const MCInst &Inst) const {
+ const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
+ return (Desc.isConditionalBranch() &&
(AlignBranchType & X86::AlignBranchJcc)) ||
- (InstDesc.isUnconditionalBranch() &&
+ (Desc.isUnconditionalBranch() &&
(AlignBranchType & X86::AlignBranchJmp)) ||
- (InstDesc.isCall() &&
- (AlignBranchType & X86::AlignBranchCall)) ||
- (InstDesc.isReturn() &&
- (AlignBranchType & X86::AlignBranchRet)) ||
- (InstDesc.isIndirectBranch() &&
+ (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
+ (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
+ (Desc.isIndirectBranch() &&
(AlignBranchType & X86::AlignBranchIndirect));
}
-static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) {
- // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it.
- return !F.canEmitNops();
-}
+/// Insert BoundaryAlignFragment before instructions to align branches.
+void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
+ const MCInst &Inst) {
+ CanPadInst = canPadInst(Inst, OS);
-MCBoundaryAlignFragment *
-X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const {
- auto *F = dyn_cast_or_null<MCBoundaryAlignFragment>(OS.getCurrentFragment());
- if (!F || !canReuseBoundaryAlignFragment(*F)) {
- F = new MCBoundaryAlignFragment(AlignBoundary);
- OS.insert(F);
- }
- return F;
-}
+ if (!canPadBranches(OS))
+ return;
+
+ if (!isMacroFused(PrevInst, Inst))
+ // Macro fusion doesn't happen indeed, clear the pending.
+ PendingBA = nullptr;
-/// Insert MCBoundaryAlignFragment before instructions to align branches.
-void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
- const MCInst &Inst) {
- if (!needAlign(OS))
+ if (!CanPadInst)
return;
- MCFragment *CF = OS.getCurrentFragment();
- bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused;
- if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) {
+ if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
// Macro fusion actually happens and there is no other fragment inserted
- // after the previous instruction. NOP can be emitted in PF to align fused
- // jcc.
- if (auto *PF =
- dyn_cast_or_null<MCBoundaryAlignFragment>(CF->getPrevNode())) {
- const_cast<MCBoundaryAlignFragment *>(PF)->setEmitNops(true);
- const_cast<MCBoundaryAlignFragment *>(PF)->setFused(true);
- }
- } else if (needAlignInst(Inst)) {
+ // after the previous instruction.
+ //
+ // Do nothing here since we already inserted a BoudaryAlign fragment when
+ // we met the first instruction in the fused pair and we'll tie them
+ // together in emitInstructionEnd.
+ //
// Note: When there is at least one fragment, such as MCAlignFragment,
// inserted after the previous instruction, e.g.
//
@@ -441,34 +628,41 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
//
// We will treat the JCC as a unfused branch although it may be fused
// with the CMP.
- auto *F = getOrCreateBoundaryAlignFragment(OS);
- F->setEmitNops(true);
- F->setFused(false);
- } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) {
- // We don't know if macro fusion happens until the reaching the next
- // instruction, so a place holder is put here if necessary.
- getOrCreateBoundaryAlignFragment(OS);
+ return;
}
- PrevInst = Inst;
+ if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
+ isFirstMacroFusibleInst(Inst, *MCII))) {
+ // If we meet a unfused branch or the first instuction in a fusiable pair,
+ // insert a BoundaryAlign fragment.
+ OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
+ }
}
-/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned
-/// if necessary.
-void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
- if (!needAlign(OS))
+/// Set the last fragment to be aligned for the BoundaryAlignFragment.
+void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
+ PrevInst = Inst;
+ MCFragment *CF = OS.getCurrentFragment();
+ PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
+ if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
+ F->setAllowAutoPadding(CanPadInst);
+
+ if (!canPadBranches(OS))
return;
- // If the branch is emitted into a MCRelaxableFragment, we can determine the
- // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the
- // branch is fused, the fused branch(macro fusion pair) must be emitted into
- // two fragments. Or when the branch is unfused, the branch must be emitted
- // into one fragment. The MCRelaxableFragment naturally marks the end of the
- // fused or unfused branch.
- // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of
- // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align
- // other branch.
- if (needAlignInst(Inst) && !isa<MCRelaxableFragment>(OS.getCurrentFragment()))
- OS.insert(new MCBoundaryAlignFragment(AlignBoundary));
+
+ if (!needAlign(Inst) || !PendingBA)
+ return;
+
+ // Tie the aligned instructions into a a pending BoundaryAlign.
+ PendingBA->setLastFragment(CF);
+ PendingBA = nullptr;
+
+ // We need to ensure that further data isn't added to the current
+ // DataFragment, so that we can get the size of instructions later in
+ // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
+ // DataFragment.
+ if (isa_and_nonnull<MCDataFragment>(CF))
+ OS.insert(new MCDataFragment());
// Update the maximum alignment on the current section if necessary.
MCSection *Sec = OS.getCurrentSectionOnly();
@@ -478,13 +672,23 @@ void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
if (STI.getTargetTriple().isOSBinFormatELF()) {
+ unsigned Type;
if (STI.getTargetTriple().getArch() == Triple::x86_64) {
- if (Name == "R_X86_64_NONE")
- return FK_NONE;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
+#undef ELF_RELOC
+ .Default(-1u);
} else {
- if (Name == "R_386_NONE")
- return FK_NONE;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/i386.def"
+#undef ELF_RELOC
+ .Default(-1u);
}
+ if (Type == -1u)
+ return None;
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
}
return MCAsmBackend::getFixupKind(Name);
}
@@ -502,6 +706,11 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
};
+ // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
+ // do not require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -514,7 +723,7 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
const MCFixup &Fixup,
const MCValue &) {
- return Fixup.getKind() == FK_NONE;
+ return Fixup.getKind() >= FirstLiteralRelocationKind;
}
static unsigned getFixupKindSize(unsigned Kind) {
@@ -556,7 +765,10 @@ void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const {
- unsigned Size = getFixupKindSize(Fixup.getKind());
+ unsigned Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
+ unsigned Size = getFixupKindSize(Kind);
assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
@@ -613,12 +825,11 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
-void X86AsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void X86AsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- bool is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
- unsigned RelaxedOp = getRelaxedOpcode(Inst, is16BitMode);
+ bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
@@ -628,8 +839,232 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst,
report_fatal_error("unexpected instruction to relax: " + OS.str());
}
- Res = Inst;
- Res.setOpcode(RelaxedOp);
+ Inst.setOpcode(RelaxedOp);
+}
+
+/// Return true if this instruction has been fully relaxed into it's most
+/// general available form.
+static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
+ auto &Inst = RF.getInst();
+ auto &STI = *RF.getSubtargetInfo();
+ bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
+}
+
+bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ if (!RF.getAllowAutoPadding())
+ return false;
+ // If the instruction isn't fully relaxed, shifting it around might require a
+ // larger value for one of the fixups then can be encoded. The outer loop
+ // will also catch this before moving to the next instruction, but we need to
+ // prevent padding this single instruction as well.
+ if (!isFullyRelaxed(RF))
+ return false;
+
+ const unsigned OldSize = RF.getContents().size();
+ if (OldSize == 15)
+ return false;
+
+ const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
+ const unsigned RemainingPrefixSize = [&]() -> unsigned {
+ SmallString<15> Code;
+ raw_svector_ostream VecOS(Code);
+ Emitter.emitPrefix(RF.getInst(), VecOS, STI);
+ assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
+
+ // TODO: It turns out we need a decent amount of plumbing for the target
+ // specific bits to determine number of prefixes its safe to add. Various
+ // targets (older chips mostly, but also Atom family) encounter decoder
+ // stalls with too many prefixes. For testing purposes, we set the value
+ // externally for the moment.
+ unsigned ExistingPrefixSize = Code.size();
+ if (TargetPrefixMax <= ExistingPrefixSize)
+ return 0;
+ return TargetPrefixMax - ExistingPrefixSize;
+ }();
+ const unsigned PrefixBytesToAdd =
+ std::min(MaxPossiblePad, RemainingPrefixSize);
+ if (PrefixBytesToAdd == 0)
+ return false;
+
+ const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
+
+ SmallString<256> Code;
+ Code.append(PrefixBytesToAdd, Prefix);
+ Code.append(RF.getContents().begin(), RF.getContents().end());
+ RF.getContents() = Code;
+
+ // Adjust the fixups for the change in offsets
+ for (auto &F : RF.getFixups()) {
+ F.setOffset(F.getOffset() + PrefixBytesToAdd);
+ }
+
+ RemainingSize -= PrefixBytesToAdd;
+ return true;
+}
+
+bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ if (isFullyRelaxed(RF))
+ // TODO: There are lots of other tricks we could apply for increasing
+ // encoding size without impacting performance.
+ return false;
+
+ MCInst Relaxed = RF.getInst();
+ relaxInstruction(Relaxed, *RF.getSubtargetInfo());
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<15> Code;
+ raw_svector_ostream VecOS(Code);
+ Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
+ const unsigned OldSize = RF.getContents().size();
+ const unsigned NewSize = Code.size();
+ assert(NewSize >= OldSize && "size decrease during relaxation?");
+ unsigned Delta = NewSize - OldSize;
+ if (Delta > RemainingSize)
+ return false;
+ RF.setInst(Relaxed);
+ RF.getContents() = Code;
+ RF.getFixups() = Fixups;
+ RemainingSize -= Delta;
+ return true;
+}
+
+bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ bool Changed = false;
+ if (RemainingSize != 0)
+ Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
+ if (RemainingSize != 0)
+ Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
+ return Changed;
+}
+
+void X86AsmBackend::finishLayout(MCAssembler const &Asm,
+ MCAsmLayout &Layout) const {
+ // See if we can further relax some instructions to cut down on the number of
+ // nop bytes required for code alignment. The actual win is in reducing
+ // instruction count, not number of bytes. Modern X86-64 can easily end up
+ // decode limited. It is often better to reduce the number of instructions
+ // (i.e. eliminate nops) even at the cost of increasing the size and
+ // complexity of others.
+ if (!X86PadForAlign && !X86PadForBranchAlign)
+ return;
+
+ DenseSet<MCFragment *> LabeledFragments;
+ for (const MCSymbol &S : Asm.symbols())
+ LabeledFragments.insert(S.getFragment(false));
+
+ for (MCSection &Sec : Asm) {
+ if (!Sec.getKind().isText())
+ continue;
+
+ SmallVector<MCRelaxableFragment *, 4> Relaxable;
+ for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
+ MCFragment &F = *I;
+
+ if (LabeledFragments.count(&F))
+ Relaxable.clear();
+
+ if (F.getKind() == MCFragment::FT_Data ||
+ F.getKind() == MCFragment::FT_CompactEncodedInst)
+ // Skip and ignore
+ continue;
+
+ if (F.getKind() == MCFragment::FT_Relaxable) {
+ auto &RF = cast<MCRelaxableFragment>(*I);
+ Relaxable.push_back(&RF);
+ continue;
+ }
+
+ auto canHandle = [](MCFragment &F) -> bool {
+ switch (F.getKind()) {
+ default:
+ return false;
+ case MCFragment::FT_Align:
+ return X86PadForAlign;
+ case MCFragment::FT_BoundaryAlign:
+ return X86PadForBranchAlign;
+ }
+ };
+ // For any unhandled kind, assume we can't change layout.
+ if (!canHandle(F)) {
+ Relaxable.clear();
+ continue;
+ }
+
+#ifndef NDEBUG
+ const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
+#endif
+ const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
+
+ // To keep the effects local, prefer to relax instructions closest to
+ // the align directive. This is purely about human understandability
+ // of the resulting code. If we later find a reason to expand
+ // particular instructions over others, we can adjust.
+ MCFragment *FirstChangedFragment = nullptr;
+ unsigned RemainingSize = OrigSize;
+ while (!Relaxable.empty() && RemainingSize != 0) {
+ auto &RF = *Relaxable.pop_back_val();
+ // Give the backend a chance to play any tricks it wishes to increase
+ // the encoding size of the given instruction. Target independent code
+ // will try further relaxation, but target's may play further tricks.
+ if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
+ FirstChangedFragment = &RF;
+
+ // If we have an instruction which hasn't been fully relaxed, we can't
+ // skip past it and insert bytes before it. Changing its starting
+ // offset might require a larger negative offset than it can encode.
+ // We don't need to worry about larger positive offsets as none of the
+ // possible offsets between this and our align are visible, and the
+ // ones afterwards aren't changing.
+ if (!isFullyRelaxed(RF))
+ break;
+ }
+ Relaxable.clear();
+
+ if (FirstChangedFragment) {
+ // Make sure the offsets for any fragments in the effected range get
+ // updated. Note that this (conservatively) invalidates the offsets of
+ // those following, but this is not required.
+ Layout.invalidateFragmentsFrom(FirstChangedFragment);
+ }
+
+ // BoundaryAlign explicitly tracks it's size (unlike align)
+ if (F.getKind() == MCFragment::FT_BoundaryAlign)
+ cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
+
+#ifndef NDEBUG
+ const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
+ const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
+ assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
+ "can't move start of next fragment!");
+ assert(FinalSize == RemainingSize && "inconsistent size computation?");
+#endif
+
+ // If we're looking at a boundary align, make sure we don't try to pad
+ // its target instructions for some following directive. Doing so would
+ // break the alignment of the current boundary align.
+ if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
+ const MCFragment *LastFragment = BF->getLastFragment();
+ if (!LastFragment)
+ continue;
+ while (&*I != LastFragment)
+ ++I;
+ }
+ }
+ }
+
+ // The layout is done. Mark every fragment as valid.
+ for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+ MCSection &Section = *Layout.getSectionOrder()[i];
+ Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
+ Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
+ }
}
/// Write a sequence of optimal nops to the output, covering \p Count
@@ -661,7 +1096,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
// This CPU doesn't support long nops. If needed add more.
// FIXME: We could generated something better than plain 0x90.
- if (!STI.getFeatureBits()[X86::FeatureNOPL]) {
+ if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
for (uint64_t i = 0; i < Count; ++i)
OS << '\x90';
return true;
@@ -670,7 +1105,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
// 15-bytes is the longest single NOP instruction, but 10-bytes is
// commonly the longest that can be efficiently decoded.
uint64_t MaxNopLength = 10;
- if (STI.getFeatureBits()[X86::ProcIntelSLM])
+ if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
MaxNopLength = 7;
else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
MaxNopLength = 15;
@@ -811,6 +1246,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
enum { CU_NUM_SAVED_REGS = 6 };
mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
+ Triple TT;
bool Is64Bit;
unsigned OffsetSize; ///< Offset of a "push" instruction.
@@ -838,10 +1274,140 @@ protected:
return 1;
}
+private:
+ /// Get the compact unwind number for a given register. The number
+ /// corresponds to the enum lists in compact_unwind_encoding.h.
+ int getCompactUnwindRegNum(unsigned Reg) const {
+ static const MCPhysReg CU32BitRegs[7] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const MCPhysReg CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+ }
+
+ /// Return the registers encoded for a compact encoding with a frame
+ /// pointer.
+ uint32_t encodeCompactUnwindRegistersWithFrame() const {
+ // Encode the registers in the order they were saved --- 3-bits per
+ // register. The list of saved registers is assumed to be in reverse
+ // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
+ uint32_t RegEnc = 0;
+ for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ unsigned Reg = SavedRegs[i];
+ if (Reg == 0) break;
+
+ int CURegNum = getCompactUnwindRegNum(Reg);
+ if (CURegNum == -1) return ~0U;
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for
+ // each register.
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
+ }
+
+ assert((RegEnc & 0x3FFFF) == RegEnc &&
+ "Invalid compact register encoding!");
+ return RegEnc;
+ }
+
+ /// Create the permutation encoding used with frameless stacks. It is
+ /// passed the number of registers to be saved and an array of the registers
+ /// saved.
+ uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
+ // The saved registers are numbered from 1 to 6. In order to encode the
+ // order in which they were saved, we re-number them according to their
+ // place in the register order. The re-numbering is relative to the last
+ // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
+ // that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ for (unsigned i = 0; i < RegCount; ++i) {
+ int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
+
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
+ unsigned Countless = 0;
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+ }
+
+public:
+ DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI)
+ : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
+ Is64Bit(TT.isArch64Bit()) {
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ OffsetSize = Is64Bit ? 8 : 4;
+ MoveInstrSize = Is64Bit ? 3 : 2;
+ StackDivide = Is64Bit ? 8 : 4;
+ }
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ uint32_t CPUType = cantFail(MachO::getCPUType(TT));
+ uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
+ return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
+ }
+
/// Implementation of algorithm to generate the compact unwind encoding
/// for the CFI instructions.
uint32_t
- generateCompactUnwindEncodingImpl(ArrayRef<MCCFIInstruction> Instrs) const {
+ generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
if (Instrs.empty()) return 0;
// Reset the saved registers.
@@ -904,7 +1470,7 @@ protected:
// L0:
// .cfi_def_cfa_offset 80
//
- StackSize = std::abs(Inst.getOffset()) / StackDivide;
+ StackSize = Inst.getOffset() / StackDivide;
++NumDefCFAOffsets;
break;
}
@@ -991,168 +1557,6 @@ protected:
return CompactUnwindEncoding;
}
-
-private:
- /// Get the compact unwind number for a given register. The number
- /// corresponds to the enum lists in compact_unwind_encoding.h.
- int getCompactUnwindRegNum(unsigned Reg) const {
- static const MCPhysReg CU32BitRegs[7] = {
- X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
- };
- static const MCPhysReg CU64BitRegs[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
- const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
- for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
- if (*CURegs == Reg)
- return Idx;
-
- return -1;
- }
-
- /// Return the registers encoded for a compact encoding with a frame
- /// pointer.
- uint32_t encodeCompactUnwindRegistersWithFrame() const {
- // Encode the registers in the order they were saved --- 3-bits per
- // register. The list of saved registers is assumed to be in reverse
- // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
- uint32_t RegEnc = 0;
- for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
- unsigned Reg = SavedRegs[i];
- if (Reg == 0) break;
-
- int CURegNum = getCompactUnwindRegNum(Reg);
- if (CURegNum == -1) return ~0U;
-
- // Encode the 3-bit register number in order, skipping over 3-bits for
- // each register.
- RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
- }
-
- assert((RegEnc & 0x3FFFF) == RegEnc &&
- "Invalid compact register encoding!");
- return RegEnc;
- }
-
- /// Create the permutation encoding used with frameless stacks. It is
- /// passed the number of registers to be saved and an array of the registers
- /// saved.
- uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
- // The saved registers are numbered from 1 to 6. In order to encode the
- // order in which they were saved, we re-number them according to their
- // place in the register order. The re-numbering is relative to the last
- // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
- // that order:
- //
- // Orig Re-Num
- // ---- ------
- // 6 6
- // 2 2
- // 4 3
- // 5 3
- //
- for (unsigned i = 0; i < RegCount; ++i) {
- int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
- if (CUReg == -1) return ~0U;
- SavedRegs[i] = CUReg;
- }
-
- // Reverse the list.
- std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
-
- uint32_t RenumRegs[CU_NUM_SAVED_REGS];
- for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
- unsigned Countless = 0;
- for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
- if (SavedRegs[j] < SavedRegs[i])
- ++Countless;
-
- RenumRegs[i] = SavedRegs[i] - Countless - 1;
- }
-
- // Take the renumbered values and encode them into a 10-bit number.
- uint32_t permutationEncoding = 0;
- switch (RegCount) {
- case 6:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 5:
- permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
- + 6 * RenumRegs[3] + 2 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 4:
- permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
- + 3 * RenumRegs[4] + RenumRegs[5];
- break;
- case 3:
- permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 2:
- permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
- break;
- case 1:
- permutationEncoding |= RenumRegs[5];
- break;
- }
-
- assert((permutationEncoding & 0x3FF) == permutationEncoding &&
- "Invalid compact register encoding!");
- return permutationEncoding;
- }
-
-public:
- DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI, bool Is64Bit)
- : X86AsmBackend(T, STI), MRI(MRI), Is64Bit(Is64Bit) {
- memset(SavedRegs, 0, sizeof(SavedRegs));
- OffsetSize = Is64Bit ? 8 : 4;
- MoveInstrSize = Is64Bit ? 3 : 2;
- StackDivide = Is64Bit ? 8 : 4;
- }
-};
-
-class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
-public:
- DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : DarwinX86AsmBackend(T, MRI, STI, false) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- return createX86MachObjectWriter(/*Is64Bit=*/false,
- MachO::CPU_TYPE_I386,
- MachO::CPU_SUBTYPE_I386_ALL);
- }
-
- /// Generate the compact unwind encoding for the CFI instructions.
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override {
- return generateCompactUnwindEncodingImpl(Instrs);
- }
-};
-
-class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
- const MachO::CPUSubTypeX86 Subtype;
-public:
- DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI, MachO::CPUSubTypeX86 st)
- : DarwinX86AsmBackend(T, MRI, STI, true), Subtype(st) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- return createX86MachObjectWriter(/*Is64Bit=*/true, MachO::CPU_TYPE_X86_64,
- Subtype);
- }
-
- /// Generate the compact unwind encoding for the CFI instructions.
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override {
- return generateCompactUnwindEncodingImpl(Instrs);
- }
};
} // end anonymous namespace
@@ -1163,7 +1567,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
if (TheTriple.isOSBinFormatMachO())
- return new DarwinX86_32AsmBackend(T, MRI, STI);
+ return new DarwinX86AsmBackend(T, MRI, STI);
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
return new WindowsX86AsmBackend(T, false, STI);
@@ -1181,13 +1585,8 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
- if (TheTriple.isOSBinFormatMachO()) {
- MachO::CPUSubTypeX86 CS =
- StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
- .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H)
- .Default(MachO::CPU_SUBTYPE_X86_64_ALL);
- return new DarwinX86_64AsmBackend(T, MRI, STI, CS);
- }
+ if (TheTriple.isOSBinFormatMachO())
+ return new DarwinX86AsmBackend(T, MRI, STI);
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
return new WindowsX86AsmBackend(T, true, STI);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index a4f8dd669e1e5..79f07d3c7792a 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -91,7 +91,7 @@ namespace X86 {
COND_G = 15,
LAST_VALID_COND = COND_G,
- // Artificial condition codes. These are used by AnalyzeBranch
+ // Artificial condition codes. These are used by analyzeBranch
// to indicate a block terminated with two conditional branches that together
// form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
// which can't be represented on x86 with a single condition. These
@@ -356,6 +356,39 @@ namespace X86 {
AlignBranchRet = 1U << 4,
AlignBranchIndirect = 1U << 5
};
+
+ /// Defines the encoding values for segment override prefix.
+ enum EncodingOfSegmentOverridePrefix : uint8_t {
+ CS_Encoding = 0x2E,
+ DS_Encoding = 0x3E,
+ ES_Encoding = 0x26,
+ FS_Encoding = 0x64,
+ GS_Encoding = 0x65,
+ SS_Encoding = 0x36
+ };
+
+ /// Given a segment register, return the encoding of the segment override
+ /// prefix for it.
+ inline EncodingOfSegmentOverridePrefix
+ getSegmentOverridePrefixForReg(unsigned Reg) {
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown segment register!");
+ case X86::CS:
+ return CS_Encoding;
+ case X86::DS:
+ return DS_Encoding;
+ case X86::ES:
+ return ES_Encoding;
+ case X86::FS:
+ return FS_Encoding;
+ case X86::GS:
+ return GS_Encoding;
+ case X86::SS:
+ return SS_Encoding;
+ }
+ }
+
} // end namespace X86;
/// X86II - This namespace holds all of the target specific flags that
@@ -581,90 +614,107 @@ namespace X86II {
/// in the lower 4 bits of the opcode.
AddCCFrm = 9,
+ /// PrefixByte - This form is used for instructions that represent a prefix
+ /// byte like data16 or rep.
+ PrefixByte = 10,
+
/// MRM[0-7][rm] - These forms are used to represent instructions that use
/// a Mod/RM byte, and use the middle field to hold extended opcode
/// information. In the intel manual these are represented as /0, /1, ...
///
+ // Instructions operate on a register Reg/Opcode operand not the r/m field.
+ MRMr0 = 21,
+
+ /// MRMSrcMem - But force to use the SIB field.
+ MRMSrcMemFSIB = 22,
+
+ /// MRMDestMem - But force to use the SIB field.
+ MRMDestMemFSIB = 23,
+
/// MRMDestMem - This form is used for instructions that use the Mod/RM byte
/// to specify a destination, which in this case is memory.
///
- MRMDestMem = 32,
+ MRMDestMem = 24,
/// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
/// to specify a source, which in this case is memory.
///
- MRMSrcMem = 33,
+ MRMSrcMem = 25,
/// MRMSrcMem4VOp3 - This form is used for instructions that encode
/// operand 3 with VEX.VVVV and load from memory.
///
- MRMSrcMem4VOp3 = 34,
+ MRMSrcMem4VOp3 = 26,
/// MRMSrcMemOp4 - This form is used for instructions that use the Mod/RM
/// byte to specify the fourth source, which in this case is memory.
///
- MRMSrcMemOp4 = 35,
+ MRMSrcMemOp4 = 27,
/// MRMSrcMemCC - This form is used for instructions that use the Mod/RM
/// byte to specify the operands and also encodes a condition code.
///
- MRMSrcMemCC = 36,
+ MRMSrcMemCC = 28,
/// MRMXm - This form is used for instructions that use the Mod/RM byte
/// to specify a memory source, but doesn't use the middle field. And has
/// a condition code.
///
- MRMXmCC = 38,
+ MRMXmCC = 30,
/// MRMXm - This form is used for instructions that use the Mod/RM byte
/// to specify a memory source, but doesn't use the middle field.
///
- MRMXm = 39,
+ MRMXm = 31,
// Next, instructions that operate on a memory r/m operand...
- MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, // Format /0 /1 /2 /3
- MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47, // Format /4 /5 /6 /7
+ MRM0m = 32, MRM1m = 33, MRM2m = 34, MRM3m = 35, // Format /0 /1 /2 /3
+ MRM4m = 36, MRM5m = 37, MRM6m = 38, MRM7m = 39, // Format /4 /5 /6 /7
/// MRMDestReg - This form is used for instructions that use the Mod/RM byte
/// to specify a destination, which in this case is a register.
///
- MRMDestReg = 48,
+ MRMDestReg = 40,
/// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
/// to specify a source, which in this case is a register.
///
- MRMSrcReg = 49,
+ MRMSrcReg = 41,
/// MRMSrcReg4VOp3 - This form is used for instructions that encode
/// operand 3 with VEX.VVVV and do not load from memory.
///
- MRMSrcReg4VOp3 = 50,
+ MRMSrcReg4VOp3 = 42,
/// MRMSrcRegOp4 - This form is used for instructions that use the Mod/RM
/// byte to specify the fourth source, which in this case is a register.
///
- MRMSrcRegOp4 = 51,
+ MRMSrcRegOp4 = 43,
/// MRMSrcRegCC - This form is used for instructions that use the Mod/RM
/// byte to specify the operands and also encodes a condition code
///
- MRMSrcRegCC = 52,
+ MRMSrcRegCC = 44,
/// MRMXCCr - This form is used for instructions that use the Mod/RM byte
/// to specify a register source, but doesn't use the middle field. And has
/// a condition code.
///
- MRMXrCC = 54,
+ MRMXrCC = 46,
/// MRMXr - This form is used for instructions that use the Mod/RM byte
/// to specify a register source, but doesn't use the middle field.
///
- MRMXr = 55,
+ MRMXr = 47,
// Instructions that operate on a register r/m operand...
- MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, // Format /0 /1 /2 /3
- MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63, // Format /4 /5 /6 /7
+ MRM0r = 48, MRM1r = 49, MRM2r = 50, MRM3r = 51, // Format /0 /1 /2 /3
+ MRM4r = 52, MRM5r = 53, MRM6r = 54, MRM7r = 55, // Format /4 /5 /6 /7
+
+ // Instructions that operate that have mod=11 and an opcode but ignore r/m.
+ MRM0X = 56, MRM1X = 57, MRM2X = 58, MRM3X = 59, // Format /0 /1 /2 /3
+ MRM4X = 60, MRM5X = 61, MRM6X = 62, MRM7X = 63, // Format /4 /5 /6 /7
/// MRM_XX - A mod/rm byte of exactly 0xXX.
MRM_C0 = 64, MRM_C1 = 65, MRM_C2 = 66, MRM_C3 = 67,
@@ -900,6 +950,16 @@ namespace X86II {
NOTRACK = 1ULL << NoTrackShift
};
+ /// \returns true if the instruction with given opcode is a prefix.
+ inline bool isPrefix(uint64_t TSFlags) {
+ return (TSFlags & X86II::FormMask) == PrefixByte;
+ }
+
+ /// \returns true if the instruction with given opcode is a pseudo.
+ inline bool isPseudo(uint64_t TSFlags) {
+ return (TSFlags & X86II::FormMask) == Pseudo;
+ }
+
/// \returns the "base" X86 opcode for the specified machine
/// instruction.
inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
@@ -1028,10 +1088,13 @@ namespace X86II {
case X86II::RawFrmDst:
case X86II::RawFrmDstSrc:
case X86II::AddCCFrm:
+ case X86II::PrefixByte:
return -1;
case X86II::MRMDestMem:
+ case X86II::MRMDestMemFSIB:
return 0;
case X86II::MRMSrcMem:
+ case X86II::MRMSrcMemFSIB:
// Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
// mask register.
return 1 + HasVEX_4V + HasEVEX_K;
@@ -1051,12 +1114,18 @@ namespace X86II {
case X86II::MRMSrcRegOp4:
case X86II::MRMSrcRegCC:
case X86II::MRMXrCC:
+ case X86II::MRMr0:
case X86II::MRMXr:
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
return -1;
+ case X86II::MRM0X: case X86II::MRM1X:
+ case X86II::MRM2X: case X86II::MRM3X:
+ case X86II::MRM4X: case X86II::MRM5X:
+ case X86II::MRM6X: case X86II::MRM7X:
+ return -1;
case X86II::MRMXmCC:
case X86II::MRMXm:
case X86II::MRM0m: case X86II::MRM1m:
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index bd009da60851e..292dd17e2f51c 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -317,8 +317,10 @@ static unsigned getRelocType32(MCContext &Ctx,
unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
X86_64RelType Type = getType64(Kind, Modifier, IsPCRel);
if (getEMachine() == ELF::EM_X86_64)
return getRelocType64(Ctx, Fixup.getLoc(), Modifier, Type, IsPCRel, Kind);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 73b1969b4e822..b51011e2c52fc 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -15,7 +15,7 @@
#include "X86ATTInstPrinter.h"
#include "X86BaseInfo.h"
#include "X86MCTargetDesc.h"
-#include "Utils/X86ShuffleDecode.h"
+#include "X86ShuffleDecode.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/raw_ostream.h"
@@ -199,6 +199,40 @@ using namespace llvm;
CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
+#define CASE_FMA4(Inst, suf) \
+ CASE_AVX_INS_COMMON(Inst, 4, suf) \
+ CASE_AVX_INS_COMMON(Inst, 4Y, suf)
+
+#define CASE_FMA4_PACKED_RR(Inst) \
+ CASE_FMA4(Inst##PD, rr) \
+ CASE_FMA4(Inst##PS, rr)
+
+#define CASE_FMA4_PACKED_RM(Inst) \
+ CASE_FMA4(Inst##PD, rm) \
+ CASE_FMA4(Inst##PS, rm)
+
+#define CASE_FMA4_PACKED_MR(Inst) \
+ CASE_FMA4(Inst##PD, mr) \
+ CASE_FMA4(Inst##PS, mr)
+
+#define CASE_FMA4_SCALAR_RR(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rr) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rr) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int)
+
+#define CASE_FMA4_SCALAR_RM(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rm) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rm) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int)
+
+#define CASE_FMA4_SCALAR_MR(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , mr) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , mr) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
+
static unsigned getVectorRegSize(unsigned RegNo) {
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
return 512;
@@ -247,14 +281,15 @@ static void printMasking(raw_ostream &OS, const MCInst *MI,
OS << " {z}";
}
-static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
+static bool printFMAComments(const MCInst *MI, raw_ostream &OS,
+ const MCInstrInfo &MCII) {
const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
bool Negate = false;
StringRef AccStr = "+";
- // The operands for FMA instructions without rounding fall into two forms.
+ // The operands for FMA3 instructions without rounding fall into two forms:
// dest, src1, src2, src3
// dest, src1, mask, src2, src3
// Where src3 is either a register or 5 memory address operands. So to find
@@ -262,9 +297,112 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
// index from the end by taking into account memory vs register form when
// finding src2.
+ // The operands for FMA4 instructions:
+ // dest, src1, src2, src3
+ // Where src2 OR src3 are either a register or 5 memory address operands. So
+ // to find dest and src1 we can index from the front, src2 (reg/mem) follows
+ // and then src3 (reg) will be at the end.
+
switch (MI->getOpcode()) {
default:
return false;
+
+ CASE_FMA4_PACKED_RR(FMADD)
+ CASE_FMA4_SCALAR_RR(FMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMADD)
+ CASE_FMA4_SCALAR_RM(FMADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ CASE_FMA4_PACKED_MR(FMADD)
+ CASE_FMA4_SCALAR_MR(FMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+
+ CASE_FMA4_PACKED_RR(FMSUB)
+ CASE_FMA4_SCALAR_RR(FMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMSUB)
+ CASE_FMA4_SCALAR_RM(FMSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ break;
+ CASE_FMA4_PACKED_MR(FMSUB)
+ CASE_FMA4_SCALAR_MR(FMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ break;
+
+ CASE_FMA4_PACKED_RR(FNMADD)
+ CASE_FMA4_SCALAR_RR(FNMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FNMADD)
+ CASE_FMA4_SCALAR_RM(FNMADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ Negate = true;
+ break;
+ CASE_FMA4_PACKED_MR(FNMADD)
+ CASE_FMA4_SCALAR_MR(FNMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ Negate = true;
+ break;
+
+ CASE_FMA4_PACKED_RR(FNMSUB)
+ CASE_FMA4_SCALAR_RR(FNMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FNMSUB)
+ CASE_FMA4_SCALAR_RM(FNMSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ Negate = true;
+ break;
+ CASE_FMA4_PACKED_MR(FNMSUB)
+ CASE_FMA4_SCALAR_MR(FNMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ Negate = true;
+ break;
+
+ CASE_FMA4_PACKED_RR(FMADDSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMADDSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "+/-";
+ break;
+ CASE_FMA4_PACKED_MR(FMADDSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "+/-";
+ break;
+
+ CASE_FMA4_PACKED_RR(FMSUBADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMSUBADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-/+";
+ break;
+ CASE_FMA4_PACKED_MR(FMSUBADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-/+";
+ break;
+
CASE_FMA_PACKED_REG(FMADD132)
CASE_FMA_SCALAR_REG(FMADD132)
Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
@@ -476,8 +614,9 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
if (!Mul2Name) Mul2Name = "mem";
if (!AccName) AccName = "mem";
- OS << DestName << " = ";
- // TODO: Print masking information?
+ OS << DestName;
+ printMasking(OS, MI, MCII);
+ OS << " = ";
if (Negate)
OS << '-';
@@ -504,7 +643,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
- if (printFMA3Comments(MI, OS))
+ if (printFMAComments(MI, OS, MCII))
return true;
switch (MI->getOpcode()) {
@@ -669,14 +808,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PSLLDQri:
case X86::VPSLLDQri:
case X86::VPSLLDQYri:
- case X86::VPSLLDQZ128rr:
- case X86::VPSLLDQZ256rr:
- case X86::VPSLLDQZrr:
+ case X86::VPSLLDQZ128ri:
+ case X86::VPSLLDQZ256ri:
+ case X86::VPSLLDQZri:
Src1Name = getRegName(MI->getOperand(1).getReg());
LLVM_FALLTHROUGH;
- case X86::VPSLLDQZ128rm:
- case X86::VPSLLDQZ256rm:
- case X86::VPSLLDQZrm:
+ case X86::VPSLLDQZ128mi:
+ case X86::VPSLLDQZ256mi:
+ case X86::VPSLLDQZmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(NumOperands - 1).isImm())
DecodePSLLDQMask(getRegOperandNumElts(MI, 8, 0),
@@ -687,14 +826,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PSRLDQri:
case X86::VPSRLDQri:
case X86::VPSRLDQYri:
- case X86::VPSRLDQZ128rr:
- case X86::VPSRLDQZ256rr:
- case X86::VPSRLDQZrr:
+ case X86::VPSRLDQZ128ri:
+ case X86::VPSRLDQZ256ri:
+ case X86::VPSRLDQZri:
Src1Name = getRegName(MI->getOperand(1).getReg());
LLVM_FALLTHROUGH;
- case X86::VPSRLDQZ128rm:
- case X86::VPSRLDQZ256rm:
- case X86::VPSRLDQZrm:
+ case X86::VPSRLDQZ128mi:
+ case X86::VPSRLDQZ256mi:
+ case X86::VPSRLDQZmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(NumOperands - 1).isImm())
DecodePSRLDQMask(getRegOperandNumElts(MI, 8, 0),
@@ -1178,28 +1317,28 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeSubVectorBroadcast(16, 8, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, r)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, rr)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, m)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, rm)
DecodeSubVectorBroadcast(4, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, rr)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, rr)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, rm)
DecodeSubVectorBroadcast(8, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, rr)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, rr)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, rm)
DecodeSubVectorBroadcast(16, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index a215550769765..33d70fdb12142 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -13,6 +13,7 @@
#include "X86InstPrinterCommon.h"
#include "X86BaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -287,16 +288,23 @@ void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op,
}
}
-/// printPCRelImm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value (e.g. for jumps and calls). In
-/// Intel-style these print slightly differently than normal immediates.
-/// for example, a $ is not emitted.
-void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+/// value (e.g. for jumps and calls). In Intel-style these print slightly
+/// differently than normal immediates. For example, a $ is not emitted.
+///
+/// \p Address The address of the next instruction.
+/// \see MCInstPrinter::printInst
+void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
+ unsigned OpNo, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isImm())
- O << formatImm(Op.getImm());
- else {
+ if (Op.isImm()) {
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + Op.getImm();
+ if (MAI.getCodePointerSize() == 4)
+ Target &= 0xffffffff;
+ O << formatHex(Target);
+ } else
+ O << formatImm(Op.getImm());
+ } else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
// If a symbolic branch target was added as a constant expression then print
// that address in hex.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index 8e28f24b619a9..bb12ede3b7292 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -29,7 +29,9 @@ public:
void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS);
void printCMPMnemonic(const MCInst *MI, bool IsVCmp, raw_ostream &OS);
void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O);
- void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPCRelImm(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ raw_ostream &O);
+
protected:
void printInstFlags(const MCInst *MI, raw_ostream &O);
void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index f4bb0fbf62cd7..d1eb4d09851dd 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -45,8 +45,7 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (MI->getOpcode() == X86::DATA16_PREFIX &&
STI.getFeatureBits()[X86::Mode16Bit]) {
OS << "\tdata32";
- } else if (!printAliasInstr(MI, OS) &&
- !printVecCompareInstr(MI, OS))
+ } else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS))
printInstruction(MI, Address, OS);
// Next always print the annotation.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
index b409b20cbea88..82baf611df038 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
@@ -31,9 +31,10 @@ public:
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
@@ -47,14 +48,6 @@ public:
void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O);
void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
- void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
-
- void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
-
void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "byte ptr ";
printMemReference(MI, OpNo, O);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index d986c829d98eb..c294da6baffa7 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -71,8 +71,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
// (actually, must, since otherwise the non-extern relocations we produce
// overwhelm ld64's tiny little mind and it fails).
DwarfFDESymbolsUseAbsDiff = true;
-
- UseIntegratedAssembler = true;
}
X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
@@ -102,10 +100,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
-
- // Always enable the integrated assembler by default.
- // Clang also enabled it when the OS is Solaris but that is redundant here.
- UseIntegratedAssembler = true;
}
const MCExpr *
@@ -141,8 +135,16 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
TextAlignFillValue = 0x90;
AllowAtInName = true;
+}
- UseIntegratedAssembler = true;
+void X86MCAsmInfoMicrosoftMASM::anchor() { }
+
+X86MCAsmInfoMicrosoftMASM::X86MCAsmInfoMicrosoftMASM(const Triple &Triple)
+ : X86MCAsmInfoMicrosoft(Triple) {
+ DollarIsPC = true;
+ SeparatorString = "\n";
+ CommentString = ";";
+ AllowSymbolAtNameStart = true;
}
void X86MCAsmInfoGNUCOFF::anchor() { }
@@ -164,6 +166,4 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
TextAlignFillValue = 0x90;
AllowAtInName = true;
-
- UseIntegratedAssembler = true;
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index b2369647a40f2..ce8e84fb96b9b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -49,6 +48,13 @@ public:
explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
};
+class X86MCAsmInfoMicrosoftMASM : public X86MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit X86MCAsmInfoMicrosoftMASM(const Triple &Triple);
+};
+
class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
void anchor() override;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 54a293702bd0f..7dea0760a8310 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -55,83 +55,64 @@ public:
const MCSubtargetInfo &STI) const override;
private:
- unsigned getX86RegNum(const MCOperand &MO) const {
- return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7;
- }
+ unsigned getX86RegNum(const MCOperand &MO) const;
- unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const {
- return Ctx.getRegisterInfo()->getEncodingValue(
- MI.getOperand(OpNum).getReg());
- }
+ unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const;
/// \param MI a single low-level machine instruction.
/// \param OpNum the operand #.
/// \returns true if the OpNumth operand of MI require a bit to be set in
/// REX prefix.
- bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const {
- return (getX86RegEncoding(MI, OpNum) >> 3) & 1;
- }
-
- void emitByte(uint8_t C, unsigned &CurByte, raw_ostream &OS) const {
- OS << (char)C;
- ++CurByte;
- }
-
- void emitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
- raw_ostream &OS) const {
- // Output the constant in little endian byte order.
- for (unsigned i = 0; i != Size; ++i) {
- emitByte(Val & 255, CurByte, OS);
- Val >>= 8;
- }
- }
+ bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const;
void emitImmediate(const MCOperand &Disp, SMLoc Loc, unsigned ImmSize,
- MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
+ MCFixupKind FixupKind, uint64_t StartByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
- static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
- assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
- return RM | (RegOpcode << 3) | (Mod << 6);
- }
-
void emitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
- unsigned &CurByte, raw_ostream &OS) const {
- emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), CurByte, OS);
- }
+ raw_ostream &OS) const;
void emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
- unsigned &CurByte, raw_ostream &OS) const {
- // SIB byte is in the same format as the modRMByte.
- emitByte(modRMByte(SS, Index, Base), CurByte, OS);
- }
+ raw_ostream &OS) const;
void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
- uint64_t TSFlags, bool Rex, unsigned &CurByte,
+ uint64_t TSFlags, bool HasREX, uint64_t StartByte,
raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI,
+ bool ForceSIB = false) const;
- void emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, unsigned &CurByte,
- bool &Rex, const MCInst &MI, const MCInstrDesc &Desc,
- const MCSubtargetInfo &STI, raw_ostream &OS) const;
+ bool emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
+ const MCSubtargetInfo &STI, raw_ostream &OS) const;
- void emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
- const MCInst &MI, const MCInstrDesc &Desc,
+ void emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
raw_ostream &OS) const;
- void emitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand,
- const MCInst &MI, raw_ostream &OS) const;
+ void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI,
+ raw_ostream &OS) const;
- bool emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
- const MCInst &MI, const MCInstrDesc &Desc,
+ bool emitOpcodePrefix(int MemOperand, const MCInst &MI,
const MCSubtargetInfo &STI, raw_ostream &OS) const;
- uint8_t determineREXPrefix(const MCInst &MI, uint64_t TSFlags, int MemOperand,
- const MCInstrDesc &Desc) const;
+ bool emitREXPrefix(int MemOperand, const MCInst &MI, raw_ostream &OS) const;
};
} // end anonymous namespace
+static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+static void emitByte(uint8_t C, raw_ostream &OS) { OS << static_cast<char>(C); }
+
+static void emitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ emitByte(Val & 255, OS);
+ Val >>= 8;
+ }
+}
+
/// \returns true if this signed displacement fits in a 8-bit sign-extended
/// field.
static bool isDisp8(int Value) { return Value == (int8_t)Value; }
@@ -275,7 +256,8 @@ static bool hasSecRelSymbolRef(const MCExpr *Expr) {
static bool isPCRel32Branch(const MCInst &MI, const MCInstrInfo &MCII) {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MCII.get(Opcode);
- if ((Opcode != X86::CALL64pcrel32 && Opcode != X86::JMP_4) ||
+ if ((Opcode != X86::CALL64pcrel32 && Opcode != X86::JMP_4 &&
+ Opcode != X86::JCC_4) ||
getImmFixupKind(Desc.TSFlags) != FK_PCRel_4)
return false;
@@ -288,9 +270,27 @@ static bool isPCRel32Branch(const MCInst &MI, const MCInstrInfo &MCII) {
return Ref && Ref->getKind() == MCSymbolRefExpr::VK_None;
}
+unsigned X86MCCodeEmitter::getX86RegNum(const MCOperand &MO) const {
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7;
+}
+
+unsigned X86MCCodeEmitter::getX86RegEncoding(const MCInst &MI,
+ unsigned OpNum) const {
+ return Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(OpNum).getReg());
+}
+
+/// \param MI a single low-level machine instruction.
+/// \param OpNum the operand #.
+/// \returns true if the OpNumth operand of MI require a bit to be set in
+/// REX prefix.
+bool X86MCCodeEmitter::isREXExtendedReg(const MCInst &MI,
+ unsigned OpNum) const {
+ return (getX86RegEncoding(MI, OpNum) >> 3) & 1;
+}
+
void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
unsigned Size, MCFixupKind FixupKind,
- unsigned &CurByte, raw_ostream &OS,
+ uint64_t StartByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
int ImmOffset) const {
const MCExpr *Expr = nullptr;
@@ -299,7 +299,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
// relocation, emit it now.
if (FixupKind != FK_PCRel_1 && FixupKind != FK_PCRel_2 &&
FixupKind != FK_PCRel_4) {
- emitConstant(DispOp.getImm() + ImmOffset, Size, CurByte, OS);
+ emitConstant(DispOp.getImm() + ImmOffset, Size, OS);
return;
}
Expr = MCConstantExpr::create(DispOp.getImm(), Ctx);
@@ -322,7 +322,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
}
if (Kind == GOT_Normal)
- ImmOffset = CurByte;
+ ImmOffset = static_cast<int>(OS.tell() - StartByte);
} else if (Expr->getKind() == MCExpr::SymbolRef) {
if (hasSecRelSymbolRef(Expr)) {
FixupKind = MCFixupKind(FK_SecRel_4);
@@ -361,16 +361,30 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
Ctx);
// Emit a symbolic constant as a fixup and 4 zeros.
- Fixups.push_back(MCFixup::create(CurByte, Expr, FixupKind, Loc));
- emitConstant(0, Size, CurByte, OS);
+ Fixups.push_back(MCFixup::create(static_cast<uint32_t>(OS.tell() - StartByte),
+ Expr, FixupKind, Loc));
+ emitConstant(0, Size, OS);
+}
+
+void X86MCCodeEmitter::emitRegModRMByte(const MCOperand &ModRMReg,
+ unsigned RegOpcodeFld,
+ raw_ostream &OS) const {
+ emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), OS);
+}
+
+void X86MCCodeEmitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+ raw_ostream &OS) const {
+ // SIB byte is in the same format as the modRMByte.
+ emitByte(modRMByte(SS, Index, Base), OS);
}
void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned RegOpcodeField,
- uint64_t TSFlags, bool Rex,
- unsigned &CurByte, raw_ostream &OS,
+ uint64_t TSFlags, bool HasREX,
+ uint64_t StartByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI,
+ bool ForceSIB) const {
const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt);
@@ -383,8 +397,9 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode
assert(STI.hasFeature(X86::Mode64Bit) &&
"Rip-relative addressing requires 64-bit mode");
- assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
- emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS);
+ assert(IndexReg.getReg() == 0 && !ForceSIB &&
+ "Invalid rip-relative address");
+ emitByte(modRMByte(0, RegOpcodeField, 5), OS);
unsigned Opcode = MI.getOpcode();
// movq loads are handled with a special relocation form which allows the
@@ -395,7 +410,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
default:
return X86::reloc_riprel_4byte;
case X86::MOV64rm:
- assert(Rex);
+ assert(HasREX);
return X86::reloc_riprel_4byte_movq_load;
case X86::CALL64m:
case X86::JMP64m:
@@ -409,8 +424,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
case X86::SBB64rm:
case X86::SUB64rm:
case X86::XOR64rm:
- return Rex ? X86::reloc_riprel_4byte_relax_rex
- : X86::reloc_riprel_4byte_relax;
+ return HasREX ? X86::reloc_riprel_4byte_relax_rex
+ : X86::reloc_riprel_4byte_relax;
}
}();
@@ -425,7 +440,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
? X86II::getSizeOfImm(TSFlags)
: 0;
- emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS,
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS,
Fixups, -ImmSize);
return;
}
@@ -472,23 +487,23 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
if (Disp.isImm() && isDisp8(Disp.getImm())) {
if (Disp.getImm() == 0 && RMfield != 6) {
// There is no displacement; just the register.
- emitByte(modRMByte(0, RegOpcodeField, RMfield), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, RMfield), OS);
return;
}
// Use the [REG]+disp8 form, including for [BP] which cannot be encoded.
- emitByte(modRMByte(1, RegOpcodeField, RMfield), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ emitByte(modRMByte(1, RegOpcodeField, RMfield), OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups);
return;
}
// This is the [REG]+disp16 case.
- emitByte(modRMByte(2, RegOpcodeField, RMfield), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, RMfield), OS);
} else {
// There is no BaseReg; this is the plain [disp16] case.
- emitByte(modRMByte(0, RegOpcodeField, 6), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 6), OS);
}
// Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases.
- emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups);
+ emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, StartByte, OS, Fixups);
return;
}
@@ -498,7 +513,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// 2-7) and absolute references.
if ( // The SIB byte must be used if there is an index register.
- IndexReg.getReg() == 0 &&
+ !ForceSIB && IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12, all of which
// encode to an R/M value of 4, which indicates that a SIB byte is
// present.
@@ -508,8 +523,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
(!STI.hasFeature(X86::Mode64Bit) || BaseReg != 0)) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
- emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups);
+ emitByte(modRMByte(0, RegOpcodeField, 5), OS);
+ emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, OS, Fixups);
return;
}
@@ -519,7 +534,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// by emitting a displacement of 0 below.
if (BaseRegNo != N86::EBP) {
if (Disp.isImm() && Disp.getImm() == 0) {
- emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS);
return;
}
@@ -530,7 +545,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// This is exclusively used by call *a@tlscall(base). The relocation
// (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning.
Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc()));
- emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS);
return;
}
}
@@ -539,27 +554,27 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (Disp.isImm()) {
if (!HasEVEX && isDisp8(Disp.getImm())) {
- emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups);
return;
}
// Try EVEX compressed 8-bit displacement first; if failed, fall back to
// 32-bit displacement.
int CDisp8 = 0;
if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
- emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups,
+ emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
CDisp8 - Disp.getImm());
return;
}
}
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
- emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), OS);
unsigned Opcode = MI.getOpcode();
unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax
: X86::reloc_signed_4byte;
- emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS,
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS,
Fixups);
return;
}
@@ -575,30 +590,30 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
if (BaseReg == 0) {
// If there is no base register, we emit the special case SIB byte with
// MOD=0, BASE=5, to JUST get the index, scale, and displacement.
- emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), OS);
ForceDisp32 = true;
} else if (!Disp.isImm()) {
// Emit the normal disp32 encoding.
- emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, 4), OS);
ForceDisp32 = true;
} else if (Disp.getImm() == 0 &&
// Base reg can't be anything that ends up with '5' as the base
// reg, it is the magic [*] nomenclature that indicates no base.
BaseRegNo != N86::EBP) {
// Emit no displacement ModR/M byte
- emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), OS);
} else if (!HasEVEX && isDisp8(Disp.getImm())) {
// Emit the disp8 encoding.
- emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(1, RegOpcodeField, 4), OS);
ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
} else if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
// Emit the disp8 encoding.
- emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(1, RegOpcodeField, 4), OS);
ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
ImmOffset = CDisp8 - Disp.getImm();
} else {
// Emit the normal disp32 encoding.
- emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, 4), OS);
}
// Calculate what the SS field value should be...
@@ -613,77 +628,78 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
IndexRegNo = getX86RegNum(IndexReg);
else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
IndexRegNo = 4;
- emitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+ emitSIBByte(SS, IndexRegNo, 5, OS);
} else {
unsigned IndexRegNo;
if (IndexReg.getReg())
IndexRegNo = getX86RegNum(IndexReg);
else
IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
- emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), CurByte, OS);
+ emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), OS);
}
// Do we need to output a displacement?
if (ForceDisp8)
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups,
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
ImmOffset);
else if (ForceDisp32 || Disp.getImm() != 0)
emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
}
-void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
- unsigned &CurByte, bool &Rex,
- const MCInst &MI, const MCInstrDesc &Desc,
- const MCSubtargetInfo &STI,
- raw_ostream &OS) const {
+/// Emit all instruction prefixes.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &OS) const {
+ uint64_t TSFlags = MCII.get(MI.getOpcode()).TSFlags;
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
- if (MemoryOperand != -1)
- MemoryOperand += CurOp;
-
// Emit segment override opcode prefix as needed.
- if (MemoryOperand >= 0)
- emitSegmentOverridePrefix(CurByte, MemoryOperand + X86::AddrSegmentReg, MI,
- OS);
+ if (MemoryOperand != -1) {
+ MemoryOperand += CurOp;
+ emitSegmentOverridePrefix(MemoryOperand + X86::AddrSegmentReg, MI, OS);
+ }
// Emit the repeat opcode prefix as needed.
unsigned Flags = MI.getFlags();
if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT)
- emitByte(0xF3, CurByte, OS);
+ emitByte(0xF3, OS);
if (Flags & X86::IP_HAS_REPEAT_NE)
- emitByte(0xF2, CurByte, OS);
+ emitByte(0xF2, OS);
// Emit the address size opcode prefix as needed.
- bool need_address_override;
+ bool NeedAddressOverride;
uint64_t AdSize = TSFlags & X86II::AdSizeMask;
if ((STI.hasFeature(X86::Mode16Bit) && AdSize == X86II::AdSize32) ||
(STI.hasFeature(X86::Mode32Bit) && AdSize == X86II::AdSize16) ||
(STI.hasFeature(X86::Mode64Bit) && AdSize == X86II::AdSize32)) {
- need_address_override = true;
+ NeedAddressOverride = true;
} else if (MemoryOperand < 0) {
- need_address_override = false;
+ NeedAddressOverride = false;
} else if (STI.hasFeature(X86::Mode64Bit)) {
assert(!is16BitMemOperand(MI, MemoryOperand, STI));
- need_address_override = is32BitMemOperand(MI, MemoryOperand);
+ NeedAddressOverride = is32BitMemOperand(MI, MemoryOperand);
} else if (STI.hasFeature(X86::Mode32Bit)) {
assert(!is64BitMemOperand(MI, MemoryOperand));
- need_address_override = is16BitMemOperand(MI, MemoryOperand, STI);
+ NeedAddressOverride = is16BitMemOperand(MI, MemoryOperand, STI);
} else {
assert(STI.hasFeature(X86::Mode16Bit));
assert(!is64BitMemOperand(MI, MemoryOperand));
- need_address_override = !is16BitMemOperand(MI, MemoryOperand, STI);
+ NeedAddressOverride = !is16BitMemOperand(MI, MemoryOperand, STI);
}
- if (need_address_override)
- emitByte(0x67, CurByte, OS);
+ if (NeedAddressOverride)
+ emitByte(0x67, OS);
// Encoding type for this instruction.
uint64_t Encoding = TSFlags & X86II::EncodingMask;
- if (Encoding == 0)
- Rex = emitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, STI, OS);
+ bool HasREX = false;
+ if (Encoding)
+ emitVEXOpcodePrefix(MemoryOperand, MI, OS);
else
- emitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+ HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS);
uint64_t Form = TSFlags & X86II::FormMask;
switch (Form) {
@@ -697,11 +713,11 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
"SI and DI register sizes do not match");
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(2).getReg() != X86::DS)
- emitSegmentOverridePrefix(CurByte, 2, MI, OS);
+ emitSegmentOverridePrefix(2, MI, OS);
// Emit AdSize prefix as needed.
if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
(STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
- emitByte(0x67, CurByte, OS);
+ emitByte(0x67, OS);
CurOp += 3; // Consume operands.
break;
}
@@ -709,11 +725,11 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
unsigned siReg = MI.getOperand(0).getReg();
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(1).getReg() != X86::DS)
- emitSegmentOverridePrefix(CurByte, 1, MI, OS);
+ emitSegmentOverridePrefix(1, MI, OS);
// Emit AdSize prefix as needed.
if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
(STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
- emitByte(0x67, CurByte, OS);
+ emitByte(0x67, OS);
CurOp += 2; // Consume operands.
break;
}
@@ -722,24 +738,26 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
// Emit AdSize prefix as needed.
if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::EDI) ||
(STI.hasFeature(X86::Mode32Bit) && siReg == X86::DI))
- emitByte(0x67, CurByte, OS);
+ emitByte(0x67, OS);
++CurOp; // Consume operand.
break;
}
case X86II::RawFrmMemOffs: {
// Emit segment override opcode prefix as needed.
- emitSegmentOverridePrefix(CurByte, 1, MI, OS);
+ emitSegmentOverridePrefix(1, MI, OS);
break;
}
}
+
+ return HasREX;
}
-/// emitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
-/// called VEX.
-void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
- int MemOperand, const MCInst &MI,
- const MCInstrDesc &Desc,
+/// AVX instructions are encoded using a opcode prefix called VEX.
+void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
raw_ostream &OS) const {
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
assert(!(TSFlags & X86II::LOCK) && "Can't have LOCK VEX.");
uint64_t Encoding = TSFlags & X86II::EncodingMask;
@@ -868,8 +886,11 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
switch (TSFlags & X86II::FormMask) {
default:
llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
+ case X86II::MRM_C0:
case X86II::RawFrm:
+ case X86II::PrefixByte:
break;
+ case X86II::MRMDestMemFSIB:
case X86II::MRMDestMem: {
// MRMDestMem instructions forms:
// MemAddr, src1(ModR/M)
@@ -900,6 +921,7 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EVEX_R2 = ~(RegEnc >> 4) & 1;
break;
}
+ case X86II::MRMSrcMemFSIB:
case X86II::MRMSrcMem: {
// MRMSrcMem instructions forms:
// src1(ModR/M), MemAddr
@@ -1081,6 +1103,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EncodeRC = true;
break;
}
+ case X86II::MRMr0: {
+ // MRMr0 instructions forms:
+ // 11:rrr:000
+ // dst(ModR/M)
+ unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
+ VEX_R = ~(RegEnc >> 3) & 1;
+ EVEX_R2 = ~(RegEnc >> 4) & 1;
+ break;
+ }
case X86II::MRM0r:
case X86II::MRM1r:
case X86II::MRM2r:
@@ -1127,15 +1158,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// Can we use the 2 byte VEX prefix?
if (!(MI.getFlags() & X86::IP_USE_VEX3) && Encoding == X86II::VEX &&
VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
- emitByte(0xC5, CurByte, OS);
- emitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ emitByte(0xC5, OS);
+ emitByte(LastByte | (VEX_R << 7), OS);
return;
}
// 3 byte VEX prefix
- emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, CurByte, OS);
- emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
- emitByte(LastByte | (VEX_W << 7), CurByte, OS);
+ emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, OS);
+ emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, OS);
+ emitByte(LastByte | (VEX_W << 7), OS);
} else {
assert(Encoding == X86II::EVEX && "unknown encoding!");
// EVEX opcode prefix can have 4 bytes
@@ -1146,144 +1177,137 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
assert((VEX_5M & 0x3) == VEX_5M &&
"More than 2 significant bits in VEX.m-mmmm fields for EVEX!");
- emitByte(0x62, CurByte, OS);
+ emitByte(0x62, OS);
emitByte((VEX_R << 7) | (VEX_X << 6) | (VEX_B << 5) | (EVEX_R2 << 4) |
VEX_5M,
- CurByte, OS);
- emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, CurByte,
OS);
+ emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, OS);
if (EncodeRC)
emitByte((EVEX_z << 7) | (EVEX_rc << 5) | (EVEX_b << 4) | (EVEX_V2 << 3) |
EVEX_aaa,
- CurByte, OS);
+ OS);
else
emitByte((EVEX_z << 7) | (EVEX_L2 << 6) | (VEX_L << 5) | (EVEX_b << 4) |
(EVEX_V2 << 3) | EVEX_aaa,
- CurByte, OS);
+ OS);
}
}
-/// Determine if the MCInst has to be encoded with a X86-64 REX prefix which
-/// specifies 1) 64-bit instructions, 2) non-default operand size, and 3) use
-/// of X86-64 extended registers.
-uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI, uint64_t TSFlags,
- int MemOperand,
- const MCInstrDesc &Desc) const {
- uint8_t REX = 0;
- bool UsesHighByteReg = false;
-
- if (TSFlags & X86II::REX_W)
- REX |= 1 << 3; // set REX.W
+/// Emit REX prefix which specifies
+/// 1) 64-bit instructions,
+/// 2) non-default operand size, and
+/// 3) use of X86-64 extended registers.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
+ raw_ostream &OS) const {
+ uint8_t REX = [&, MemOperand]() {
+ uint8_t REX = 0;
+ bool UsesHighByteReg = false;
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::REX_W)
+ REX |= 1 << 3; // set REX.W
+
+ if (MI.getNumOperands() == 0)
+ return REX;
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned CurOp = X86II::getOperandBias(Desc);
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ for (unsigned i = CurOp; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+ UsesHighByteReg = true;
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
+ // FIXME: The caller of determineREXPrefix slaps this prefix onto
+ // anything that returns non-zero.
+ REX |= 0x40; // REX fixed encoding prefix
+ }
- if (MI.getNumOperands() == 0)
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::AddRegFrm:
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ break;
+ case X86II::MRMSrcReg:
+ case X86II::MRMSrcRegCC:
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ break;
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcMemCC:
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+ CurOp += X86::AddrNumOperands;
+ break;
+ case X86II::MRMDestReg:
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ break;
+ case X86II::MRMDestMem:
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+ CurOp += X86::AddrNumOperands;
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ break;
+ case X86II::MRMXmCC:
+ case X86II::MRMXm:
+ case X86II::MRM0m:
+ case X86II::MRM1m:
+ case X86II::MRM2m:
+ case X86II::MRM3m:
+ case X86II::MRM4m:
+ case X86II::MRM5m:
+ case X86II::MRM6m:
+ case X86II::MRM7m:
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+ break;
+ case X86II::MRMXrCC:
+ case X86II::MRMXr:
+ case X86II::MRM0r:
+ case X86II::MRM1r:
+ case X86II::MRM2r:
+ case X86II::MRM3r:
+ case X86II::MRM4r:
+ case X86II::MRM5r:
+ case X86II::MRM6r:
+ case X86II::MRM7r:
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ break;
+ case X86II::MRMr0:
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ break;
+ case X86II::MRMDestMemFSIB:
+ llvm_unreachable("FSIB format never need REX prefix!");
+ }
+ if (REX && UsesHighByteReg)
+ report_fatal_error(
+ "Cannot encode high byte register in REX-prefixed instruction");
return REX;
+ }();
- unsigned NumOps = MI.getNumOperands();
- unsigned CurOp = X86II::getOperandBias(Desc);
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- for (unsigned i = CurOp; i != NumOps; ++i) {
- const MCOperand &MO = MI.getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
- UsesHighByteReg = true;
- if (X86II::isX86_64NonExtLowByteReg(Reg))
- // FIXME: The caller of determineREXPrefix slaps this prefix onto anything
- // that returns non-zero.
- REX |= 0x40; // REX fixed encoding prefix
- }
-
- switch (TSFlags & X86II::FormMask) {
- case X86II::AddRegFrm:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- case X86II::MRMSrcReg:
- case X86II::MRMSrcRegCC:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- case X86II::MRMSrcMem:
- case X86II::MRMSrcMemCC:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- CurOp += X86::AddrNumOperands;
- break;
- case X86II::MRMDestReg:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- break;
- case X86II::MRMDestMem:
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- CurOp += X86::AddrNumOperands;
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- break;
- case X86II::MRMXmCC:
- case X86II::MRMXm:
- case X86II::MRM0m:
- case X86II::MRM1m:
- case X86II::MRM2m:
- case X86II::MRM3m:
- case X86II::MRM4m:
- case X86II::MRM5m:
- case X86II::MRM6m:
- case X86II::MRM7m:
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- break;
- case X86II::MRMXrCC:
- case X86II::MRMXr:
- case X86II::MRM0r:
- case X86II::MRM1r:
- case X86II::MRM2r:
- case X86II::MRM3r:
- case X86II::MRM4r:
- case X86II::MRM5r:
- case X86II::MRM6r:
- case X86II::MRM7r:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- }
- if (REX && UsesHighByteReg)
- report_fatal_error(
- "Cannot encode high byte register in REX-prefixed instruction");
+ if (!REX)
+ return false;
- return REX;
+ emitByte(0x40 | REX, OS);
+ return true;
}
/// Emit segment override opcode prefix as needed.
-void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
- unsigned SegOperand,
+void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned SegOperand,
const MCInst &MI,
raw_ostream &OS) const {
// Check for explicit segment override on memory operand.
- switch (MI.getOperand(SegOperand).getReg()) {
- default:
- llvm_unreachable("Unknown segment register!");
- case 0:
- break;
- case X86::CS:
- emitByte(0x2E, CurByte, OS);
- break;
- case X86::SS:
- emitByte(0x36, CurByte, OS);
- break;
- case X86::DS:
- emitByte(0x3E, CurByte, OS);
- break;
- case X86::ES:
- emitByte(0x26, CurByte, OS);
- break;
- case X86::FS:
- emitByte(0x64, CurByte, OS);
- break;
- case X86::GS:
- emitByte(0x65, CurByte, OS);
- break;
- }
+ if (unsigned Reg = MI.getOperand(SegOperand).getReg())
+ emitByte(X86::getSegmentOverridePrefixForReg(Reg), OS);
}
/// Emit all instruction prefixes prior to the opcode.
@@ -1291,48 +1315,44 @@ void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
/// \param MemOperand the operand # of the start of a memory operand if present.
/// If not present, it is -1.
///
-/// \returns true if a REX prefix was used.
-bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
- int MemOperand, const MCInst &MI,
- const MCInstrDesc &Desc,
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
const MCSubtargetInfo &STI,
raw_ostream &OS) const {
- bool Ret = false;
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
// Emit the operand size opcode prefix as needed.
if ((TSFlags & X86II::OpSizeMask) ==
(STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16))
- emitByte(0x66, CurByte, OS);
+ emitByte(0x66, OS);
// Emit the LOCK opcode prefix.
if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK)
- emitByte(0xF0, CurByte, OS);
+ emitByte(0xF0, OS);
// Emit the NOTRACK opcode prefix.
if (TSFlags & X86II::NOTRACK || MI.getFlags() & X86::IP_HAS_NOTRACK)
- emitByte(0x3E, CurByte, OS);
+ emitByte(0x3E, OS);
switch (TSFlags & X86II::OpPrefixMask) {
case X86II::PD: // 66
- emitByte(0x66, CurByte, OS);
+ emitByte(0x66, OS);
break;
case X86II::XS: // F3
- emitByte(0xF3, CurByte, OS);
+ emitByte(0xF3, OS);
break;
case X86II::XD: // F2
- emitByte(0xF2, CurByte, OS);
+ emitByte(0xF2, OS);
break;
}
// Handle REX prefix.
- // FIXME: Can this come before F2 etc to simplify emission?
- if (STI.hasFeature(X86::Mode64Bit)) {
- if (uint8_t REX = determineREXPrefix(MI, TSFlags, MemOperand, Desc)) {
- emitByte(0x40 | REX, CurByte, OS);
- Ret = true;
- }
- } else {
- assert(!(TSFlags & X86II::REX_W) && "REX.W requires 64bit mode.");
- }
+ assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) &&
+ "REX.W requires 64bit mode.");
+ bool HasREX = STI.hasFeature(X86::Mode64Bit)
+ ? emitREXPrefix(MemOperand, MI, OS)
+ : false;
// 0x0F escape code must be emitted just before the opcode.
switch (TSFlags & X86II::OpMapMask) {
@@ -1340,19 +1360,20 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
case X86II::ThreeDNow: // 0F 0F, second 0F emitted by caller.
- emitByte(0x0F, CurByte, OS);
+ emitByte(0x0F, OS);
break;
}
switch (TSFlags & X86II::OpMapMask) {
case X86II::T8: // 0F 38
- emitByte(0x38, CurByte, OS);
+ emitByte(0x38, OS);
break;
case X86II::TA: // 0F 3A
- emitByte(0x3A, CurByte, OS);
+ emitByte(0x3A, OS);
break;
}
- return Ret;
+
+ return HasREX;
}
void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
@@ -1362,16 +1383,12 @@ void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
uint64_t TSFlags = Desc.TSFlags;
// Pseudo instructions don't get encoded.
- if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ if (X86II::isPseudo(TSFlags))
return;
unsigned CurOp = X86II::getOperandBias(Desc);
- // Keep track of the current byte being emitted.
- unsigned CurByte = 0;
-
- bool Rex = false;
- emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS);
+ emitPrefixImpl(CurOp, MI, STI, OS);
}
void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -1382,17 +1399,15 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
uint64_t TSFlags = Desc.TSFlags;
// Pseudo instructions don't get encoded.
- if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ if (X86II::isPseudo(TSFlags))
return;
unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = X86II::getOperandBias(Desc);
- // Keep track of the current byte being emitted.
- unsigned CurByte = 0;
+ uint64_t StartByte = OS.tell();
- bool Rex = false;
- emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS);
+ bool HasREX = emitPrefixImpl(CurOp, MI, STI, OS);
// It uses the VEX.VVVV field?
bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@@ -1422,7 +1437,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrmDstSrc:
case X86II::RawFrmSrc:
case X86II::RawFrmDst:
- emitByte(BaseOpcode, CurByte, OS);
+ case X86II::PrefixByte:
+ emitByte(BaseOpcode, OS);
break;
case X86II::AddCCFrm: {
// This will be added to the opcode in the fallthrough.
@@ -1431,47 +1447,47 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
--NumOps; // Drop the operand from the end.
LLVM_FALLTHROUGH;
case X86II::RawFrm:
- emitByte(BaseOpcode + OpcodeOffset, CurByte, OS);
+ emitByte(BaseOpcode + OpcodeOffset, OS);
if (!STI.hasFeature(X86::Mode64Bit) || !isPCRel32Branch(MI, MCII))
break;
const MCOperand &Op = MI.getOperand(CurOp++);
emitImmediate(Op, MI.getLoc(), X86II::getSizeOfImm(TSFlags),
- MCFixupKind(X86::reloc_branch_4byte_pcrel), CurByte, OS,
+ MCFixupKind(X86::reloc_branch_4byte_pcrel), StartByte, OS,
Fixups);
break;
}
case X86II::RawFrmMemOffs:
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
++CurOp; // skip segment operand
break;
case X86II::RawFrmImm8:
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
- emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte,
+ StartByte, OS, Fixups);
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, StartByte,
OS, Fixups);
break;
case X86II::RawFrmImm16:
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
- emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte,
+ StartByte, OS, Fixups);
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, StartByte,
OS, Fixups);
break;
case X86II::AddRegFrm:
- emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+ emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), OS);
break;
case X86II::MRMDestReg: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1481,12 +1497,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++SrcRegNum;
emitRegModRMByte(MI.getOperand(CurOp),
- getX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ getX86RegNum(MI.getOperand(SrcRegNum)), OS);
CurOp = SrcRegNum + 1;
break;
}
+ case X86II::MRMDestMemFSIB:
case X86II::MRMDestMem: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
if (HasEVEX_K) // Skip writemask
@@ -1495,13 +1512,14 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
++SrcRegNum;
+ bool ForceSIB = (Form == X86II::MRMDestMemFSIB);
emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
- Rex, CurByte, OS, Fixups, STI);
+ HasREX, StartByte, OS, Fixups, STI, ForceSIB);
CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMSrcReg: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1511,7 +1529,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++SrcRegNum;
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(CurOp)), OS);
CurOp = SrcRegNum + 1;
if (HasVEX_I8Reg)
I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1521,17 +1539,17 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
}
case X86II::MRMSrcReg4VOp3: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(CurOp)), OS);
CurOp = SrcRegNum + 1;
++CurOp; // Encoded in VEX.VVVV
break;
}
case X86II::MRMSrcRegOp4: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
// Skip 1st src (which is encoded in VEX_VVVV)
@@ -1542,7 +1560,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
I8RegNum = getX86RegEncoding(MI, SrcRegNum++);
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(CurOp)), OS);
CurOp = SrcRegNum + 1;
break;
}
@@ -1551,12 +1569,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned SecondOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
emitRegModRMByte(MI.getOperand(SecondOp),
- getX86RegNum(MI.getOperand(FirstOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(FirstOp)), OS);
break;
}
+ case X86II::MRMSrcMemFSIB:
case X86II::MRMSrcMem: {
unsigned FirstMemOp = CurOp + 1;
@@ -1566,10 +1585,11 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V)
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
+ bool ForceSIB = (Form == X86II::MRMSrcMemFSIB);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI, ForceSIB);
CurOp = FirstMemOp + X86::AddrNumOperands;
if (HasVEX_I8Reg)
I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1578,10 +1598,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMSrcMem4VOp3: {
unsigned FirstMemOp = CurOp + 1;
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
++CurOp; // Encoded in VEX.VVVV.
break;
@@ -1595,10 +1615,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg");
I8RegNum = getX86RegEncoding(MI, FirstMemOp++);
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
break;
}
@@ -1608,10 +1628,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI);
break;
}
@@ -1619,8 +1639,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned RegOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
- emitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
+ emitRegModRMByte(MI.getOperand(RegOp), 0, OS);
break;
}
@@ -1637,10 +1657,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitRegModRMByte(MI.getOperand(CurOp++),
- (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, CurByte,
- OS);
+ (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, OS);
+ break;
+ case X86II::MRMr0:
+ emitByte(BaseOpcode, OS);
+ emitByte(modRMByte(3, getX86RegNum(MI.getOperand(CurOp++)),0), OS);
break;
case X86II::MRMXmCC: {
@@ -1648,9 +1671,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
- emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
+ emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, HasREX, StartByte, OS, Fixups,
+ STI);
break;
}
@@ -1667,13 +1691,25 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitMemModRMByte(MI, CurOp,
(Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags,
- Rex, CurByte, OS, Fixups, STI);
+ HasREX, StartByte, OS, Fixups, STI);
CurOp += X86::AddrNumOperands;
break;
+ case X86II::MRM0X:
+ case X86II::MRM1X:
+ case X86II::MRM2X:
+ case X86II::MRM3X:
+ case X86II::MRM4X:
+ case X86II::MRM5X:
+ case X86II::MRM6X:
+ case X86II::MRM7X:
+ emitByte(BaseOpcode, OS);
+ emitByte(0xC0 + ((Form - X86II::MRM0X) << 3), OS);
+ break;
+
case X86II::MRM_C0:
case X86II::MRM_C1:
case X86II::MRM_C2:
@@ -1738,8 +1774,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_FD:
case X86II::MRM_FE:
case X86II::MRM_FF:
- emitByte(BaseOpcode, CurByte, OS);
- emitByte(0xC0 + Form - X86II::MRM_C0, CurByte, OS);
+ emitByte(BaseOpcode, OS);
+ emitByte(0xC0 + Form - X86II::MRM_C0, OS);
break;
}
@@ -1754,7 +1790,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
I8RegNum |= Val;
}
emitImmediate(MCOperand::createImm(I8RegNum), MI.getLoc(), 1, FK_Data_1,
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
} else {
// If there is a remaining operand, it must be a trailing immediate. Emit it
// according to the right size for the instruction. Some instructions
@@ -1762,13 +1798,15 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
while (CurOp != NumOps && NumOps - CurOp <= 2) {
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
}
}
if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
- emitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+ emitByte(X86II::getBaseOpcodeFor(TSFlags), OS);
+ assert(OS.tell() - StartByte <= 15 &&
+ "The size of instruction must be no longer than 15.");
#ifndef NDEBUG
// FIXME: Verify.
if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 049a3a8159841..81110ba666e95 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -30,10 +30,6 @@
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
-#if _MSC_VER
-#include <intrin.h>
-#endif
-
using namespace llvm;
#define GET_REGINFO_MC_DESC
@@ -294,7 +290,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
if (!FS.empty())
ArchFS = (Twine(ArchFS) + "," + FS).str();
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = "generic";
@@ -335,7 +331,10 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
MAI = new X86ELFMCAsmInfo(TheTriple);
} else if (TheTriple.isWindowsMSVCEnvironment() ||
TheTriple.isWindowsCoreCLREnvironment()) {
- MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+ if (Options.getAssemblyLanguage().equals_lower("masm"))
+ MAI = new X86MCAsmInfoMicrosoftMASM(TheTriple);
+ else
+ MAI = new X86MCAsmInfoMicrosoft(TheTriple);
} else if (TheTriple.isOSCygMing() ||
TheTriple.isWindowsItaniumEnvironment()) {
MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
@@ -350,7 +349,7 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
// Initial state of the frame pointer is esp+stackGrowth.
unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP;
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
nullptr, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth);
MAI->addInitialFrameState(Inst);
@@ -401,6 +400,9 @@ public:
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
uint64_t GotSectionVA,
const Triple &TargetTriple) const override;
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override;
Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
uint64_t Addr,
uint64_t Size) const override;
@@ -519,6 +521,15 @@ std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries(
}
}
+bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
+ if (Inst.getNumOperands() == 0 ||
+ Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
+ return false;
+ Target = Addr + Size + Inst.getOperand(0).getImm();
+ return true;
+}
+
Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, uint64_t Addr, uint64_t Size) const {
const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 0c789061f0e13..e8c72be1d9b6f 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -13,27 +13,28 @@
#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
-#include "llvm/MC/MCRegister.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/DataTypes.h"
+#include <memory>
#include <string>
namespace llvm {
+class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
+class MCInst;
+class MCInstPrinter;
class MCInstrInfo;
class MCObjectTargetWriter;
class MCObjectWriter;
+class MCRegister;
class MCRegisterInfo;
+class MCStreamer;
class MCSubtargetInfo;
-class MCRelocationInfo;
class MCTargetOptions;
+class MCTargetStreamer;
class Target;
class Triple;
class StringRef;
-class raw_ostream;
-class raw_pwrite_stream;
/// Flavour of dwarf regnumbers
///
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
new file mode 100644
index 0000000000000..62c1c399a606e
--- /dev/null
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
@@ -0,0 +1,571 @@
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4 + CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len,
+ SmallVectorImpl<int> &ShuffleMask) {
+ assert((Idx + Len) <= NumElts && "Insertion out of range");
+
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShuffleMask.push_back(i);
+ for (unsigned i = 0; i != Len; ++i)
+ ShuffleMask[Idx + i] = NumElts + i;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = NElts / 2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts + i);
+
+ for (unsigned i = NElts / 2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts / 2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts / 2; ++i)
+ ShuffleMask.push_back(NElts + i);
+}
+
+void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i);
+ ShuffleMask.push_back(2 * i);
+ }
+}
+
+void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i + 1);
+ ShuffleMask.push_back(2 * i + 1);
+ }
+}
+
+void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
+ const unsigned NumLaneElts = 2;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i)
+ ShuffleMask.push_back(l);
+}
+
+void DecodePSLLDQMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ const unsigned NumLaneElts = 16;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ int M = SM_SentinelZero;
+ if (i >= Imm) M = i - Imm + l;
+ ShuffleMask.push_back(M);
+ }
+}
+
+void DecodePSRLDQMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ const unsigned NumLaneElts = 16;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ unsigned Base = i + Imm;
+ int M = Base + l;
+ if (Base >= NumLaneElts) M = SM_SentinelZero;
+ ShuffleMask.push_back(M);
+ }
+}
+
+void DecodePALIGNRMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ const unsigned NumLaneElts = 16;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Base = i + Imm;
+ // if i+imm is out of this lane then we actually need the other source
+ if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
+ ShuffleMask.push_back(Base + l);
+ }
+ }
+}
+
+void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Not all bits of the immediate are used so mask it.
+ assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2");
+ Imm = Imm & (NumElts - 1);
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShuffleMask.push_back(i + Imm);
+}
+
+void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned Size = NumElts * ScalarBits;
+ unsigned NumLanes = Size / 128;
+ if (NumLanes == 0) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ ShuffleMask.push_back(SplatImm % NumLaneElts + l);
+ SplatImm /= NumLaneElts;
+ }
+ }
+}
+
+void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + 4 + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ }
+}
+
+void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ }
+}
+
+void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumHalfElts = NumElts / 2;
+
+ for (unsigned l = 0; l != NumHalfElts; ++l)
+ ShuffleMask.push_back(l + NumHalfElts);
+ for (unsigned h = 0; h != NumHalfElts; ++h)
+ ShuffleMask.push_back(h);
+}
+
+void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits,
+ unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumLaneElts = 128 / ScalarBits;
+
+ unsigned NewImm = Imm;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ // each half of a lane comes from different source
+ for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
+ for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
+ NewImm /= NumLaneElts;
+ }
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
+ }
+}
+
+void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = (NumElts * ScalarBits) / 128;
+ if (NumLanes == 0) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i + NumElts); // Reads from src/src2
+ }
+ }
+}
+
+void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = (NumElts * ScalarBits) / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i + NumElts); // Reads from src/src2
+ }
+ }
+}
+
+void DecodeVectorBroadcast(unsigned NumElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ ShuffleMask.append(NumElts, 0);
+}
+
+void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned Scale = DstNumElts / SrcNumElts;
+
+ for (unsigned i = 0; i != Scale; ++i)
+ for (unsigned j = 0; j != SrcNumElts; ++j)
+ ShuffleMask.push_back(j);
+}
+
+void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
+ unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElementsInLane = 128 / ScalarSize;
+ unsigned NumLanes = NumElts / NumElementsInLane;
+
+ for (unsigned l = 0; l != NumElts; l += NumElementsInLane) {
+ unsigned Index = (Imm % NumLanes) * NumElementsInLane;
+ Imm /= NumLanes; // Discard the bits we just used.
+ // We actually need the other source.
+ if (l >= (NumElts / 2))
+ Index += NumElts;
+ for (unsigned i = 0; i != NumElementsInLane; ++i)
+ ShuffleMask.push_back(Index + i);
+ }
+}
+
+void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned HalfSize = NumElts / 2;
+
+ for (unsigned l = 0; l != 2; ++l) {
+ unsigned HalfMask = Imm >> (l * 4);
+ unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
+ for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
+ ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i);
+ }
+}
+
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ // For 256/512-bit vectors the base of the shuffle is the 128-bit
+ // subvector we're inside.
+ int Base = (i / 16) * 16;
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (M & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (M & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+}
+
+void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = 0; i < NumElts; ++i) {
+ // If there are more than 8 elements in the vector, then any immediate blend
+ // mask wraps around.
+ unsigned Bit = i % 8;
+ ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i);
+ }
+}
+
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
+
+ // VPPERM Operation
+ // Bits[4:0] - Byte Index (0 - 31)
+ // Bits[7:5] - Permute Operation
+ //
+ // Permute Operation:
+ // 0 - Source byte (no logical operation).
+ // 1 - Invert source byte.
+ // 2 - Bit reverse of source byte.
+ // 3 - Bit reverse of inverted source byte.
+ // 4 - 00h (zero - fill).
+ // 5 - FFh (ones - fill).
+ // 6 - Most significant bit of source byte replicated in all bit positions.
+ // 7 - Invert most significant bit of source byte and replicate in all bit positions.
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+
+ uint64_t M = RawMask[i];
+ uint64_t PermuteOp = (M >> 5) & 0x7;
+ if (PermuteOp == 4) {
+ ShuffleMask.push_back(SM_SentinelZero);
+ continue;
+ }
+ if (PermuteOp != 0) {
+ ShuffleMask.clear();
+ return;
+ }
+
+ uint64_t Index = M & 0x1F;
+ ShuffleMask.push_back((int)Index);
+ }
+}
+
+void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned l = 0; l != NumElts; l += 4)
+ for (unsigned i = 0; i != 4; ++i)
+ ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3));
+}
+
+void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
+ unsigned NumDstElts, bool IsAnyExtend,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned Scale = DstScalarBits / SrcScalarBits;
+ assert(SrcScalarBits < DstScalarBits &&
+ "Expected zero extension mask to increase scalar size");
+
+ int Sentinel = IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero;
+ for (unsigned i = 0; i != NumDstElts; i++) {
+ ShuffleMask.push_back(i);
+ ShuffleMask.append(Scale - 1, Sentinel);
+ }
+}
+
+void DecodeZeroMoveLowMask(unsigned NumElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ ShuffleMask.push_back(0);
+ ShuffleMask.append(NumElts - 1, SM_SentinelZero);
+}
+
+void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // First element comes from the first element of second source.
+ // Remaining elements: Load zero extends / Move copies from first source.
+ ShuffleMask.push_back(NumElts);
+ for (unsigned i = 1; i < NumElts; i++)
+ ShuffleMask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
+}
+
+void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned HalfElts = NumElts / 2;
+
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit extraction instruction as a shuffle if both the
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
+
+ // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
+ // elements of the lower 64-bits. The upper 64-bits are undefined.
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + Idx);
+ for (int i = Len; i != (int)HalfElts; ++i)
+ ShuffleMask.push_back(SM_SentinelZero);
+ for (int i = HalfElts; i != (int)NumElts; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
+void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned HalfElts = NumElts / 2;
+
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit insertion instruction as a shuffle if both the
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
+
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source starting at Idx element. The upper 64-bits are
+ // undefined.
+ for (int i = 0; i != Idx; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + NumElts);
+ for (int i = Idx + Len; i != (int)HalfElts; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = HalfElts; i != (int)NumElts; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
+void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VecSize = NumElts * ScalarBits;
+ unsigned NumLanes = VecSize / 128;
+ unsigned NumEltsPerLane = NumElts / NumLanes;
+ assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
+ "Unexpected vector size");
+ assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
+
+ for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t M = RawMask[i];
+ M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
+ unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
+ ShuffleMask.push_back((int)(LaneOffset + M));
+ }
+}
+
+void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VecSize = NumElts * ScalarBits;
+ unsigned NumLanes = VecSize / 128;
+ unsigned NumEltsPerLane = NumElts / NumLanes;
+ assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
+ assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
+ assert((NumElts == RawMask.size()) && "Unexpected mask size");
+
+ for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+
+ // VPERMIL2 Operation.
+ // Bits[3] - Match Bit.
+ // Bits[2:1] - (Per Lane) PD Shuffle Mask.
+ // Bits[2:0] - (Per Lane) PS Shuffle Mask.
+ uint64_t Selector = RawMask[i];
+ unsigned MatchBit = (Selector >> 3) & 0x1;
+
+ // M2Z[0:1] MatchBit
+ // 0Xb X Source selected by Selector index.
+ // 10b 0 Source selected by Selector index.
+ // 10b 1 Zero.
+ // 11b 0 Zero.
+ // 11b 1 Source selected by Selector index.
+ if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
+ ShuffleMask.push_back(SM_SentinelZero);
+ continue;
+ }
+
+ int Index = i & ~(NumEltsPerLane - 1);
+ if (ScalarBits == 64)
+ Index += (Selector >> 1) & 0x1;
+ else
+ Index += Selector & 0x3;
+
+ int Src = (Selector >> 2) & 0x1;
+ Index += Src * NumElts;
+ ShuffleMask.push_back(Index);
+ }
+}
+
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ uint64_t EltMaskSize = RawMask.size() - 1;
+ for (int i = 0, e = RawMask.size(); i != e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t M = RawMask[i];
+ M &= EltMaskSize;
+ ShuffleMask.push_back((int)M);
+ }
+}
+
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask) {
+ uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
+ for (int i = 0, e = RawMask.size(); i != e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t M = RawMask[i];
+ M &= EltMaskSize;
+ ShuffleMask.push_back((int)M);
+ }
+}
+
+} // llvm namespace
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h
new file mode 100644
index 0000000000000..4ef9959f7a278
--- /dev/null
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h
@@ -0,0 +1,166 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
+#define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
+
+#include <cstdint>
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+class APInt;
+template <typename T> class ArrayRef;
+template <typename T> class SmallVectorImpl;
+
+enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 };
+
+/// Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask.
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+// Insert the bottom Len elements from a second source into a vector starting at
+// element Idx.
+void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask.
+/// i.e. <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a MOVLHPS instruction as a v2f64/v4f32 shuffle mask.
+/// i.e. <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSLLDQMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSRLDQMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePALIGNRMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps.
+void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes the shuffle masks for pshufhw.
+void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes the shuffle masks for pshuflw.
+void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes a PSWAPD 3DNow! instruction.
+void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes the shuffle masks for shufp*.
+void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*.
+void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*.
+void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes a broadcast of the first element of a vector.
+void DecodeVectorBroadcast(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes a broadcast of a subvector to a larger vector type.
+void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a PSHUFB mask from a raw array of constants such as from
+/// BUILD_VECTOR.
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a BLEND immediate mask into a shuffle mask.
+void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a shuffle packed values at 128-bit granularity
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
+/// immediate mask into a shuffle mask.
+void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
+ unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+/// Decodes the shuffle masks for VPERMQ/VPERMPD.
+void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a VPPERM mask from a raw array of constants such as from
+/// BUILD_VECTOR.
+/// This can only basic masks (permutes + zeros), not any of the other
+/// operations that VPPERM can perform.
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a zero extension instruction as a shuffle mask.
+void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
+ unsigned NumDstElts, bool IsAnyExtend,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a move lower and zero upper instruction as a shuffle mask.
+void DecodeZeroMoveLowMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a scalar float move instruction as a shuffle mask.
+void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a SSE4A EXTRQ instruction as a shuffle mask.
+void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a SSE4A INSERTQ instruction as a shuffle mask.
+void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
+void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
+void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
+ SmallVectorImpl<int> &ShuffleMask);
+} // llvm namespace
+
+#endif
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index db624378d517b..3bebcc24fd3a4 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -28,7 +28,7 @@ public:
void EmitWinEHHandlerData(SMLoc Loc) override;
void EmitWindowsUnwindTables() override;
void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override;
- void FinishImpl() override;
+ void finishImpl() override;
};
void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
@@ -52,11 +52,11 @@ void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) {
XTS->emitFPOData(ProcSym, Loc);
}
-void X86WinCOFFStreamer::FinishImpl() {
- EmitFrames(nullptr);
+void X86WinCOFFStreamer::finishImpl() {
+ emitFrames(nullptr);
EmitWindowsUnwindTables();
- MCWinCOFFStreamer::FinishImpl();
+ MCWinCOFFStreamer::finishImpl();
}
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index d5494ef12370f..11251fb2b2ba7 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -159,7 +159,7 @@ bool X86WinCOFFTargetStreamer::checkInFPOPrologue(SMLoc L) {
MCSymbol *X86WinCOFFTargetStreamer::emitFPOLabel() {
MCSymbol *Label = getContext().createTempSymbol("cfi", true);
- getStreamer().EmitLabel(Label);
+ getStreamer().emitLabel(Label);
return Label;
}
@@ -372,13 +372,13 @@ void FPOStateMachine::emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label) {
OS.emitAbsoluteSymbolDiff(Label, FPO->Begin, 4); // RvaStart
OS.emitAbsoluteSymbolDiff(FPO->End, Label, 4); // CodeSize
- OS.EmitIntValue(LocalSize, 4);
- OS.EmitIntValue(FPO->ParamsSize, 4);
- OS.EmitIntValue(MaxStackSize, 4);
- OS.EmitIntValue(FrameFuncStrTabOff, 4); // FrameFunc
+ OS.emitInt32(LocalSize);
+ OS.emitInt32(FPO->ParamsSize);
+ OS.emitInt32(MaxStackSize);
+ OS.emitInt32(FrameFuncStrTabOff); // FrameFunc
OS.emitAbsoluteSymbolDiff(FPO->PrologueEnd, Label, 2);
- OS.EmitIntValue(SavedRegSize, 2);
- OS.EmitIntValue(CurFlags, 4);
+ OS.emitInt16(SavedRegSize);
+ OS.emitInt32(CurFlags);
}
/// Compute and emit the real CodeView FrameData subsection.
@@ -398,12 +398,12 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) {
MCSymbol *FrameBegin = Ctx.createTempSymbol(),
*FrameEnd = Ctx.createTempSymbol();
- OS.EmitIntValue(unsigned(DebugSubsectionKind::FrameData), 4);
+ OS.emitInt32(unsigned(DebugSubsectionKind::FrameData));
OS.emitAbsoluteSymbolDiff(FrameEnd, FrameBegin, 4);
- OS.EmitLabel(FrameBegin);
+ OS.emitLabel(FrameBegin);
// Start with the RVA of the function in question.
- OS.EmitValue(MCSymbolRefExpr::create(FPO->Function,
+ OS.emitValue(MCSymbolRefExpr::create(FPO->Function,
MCSymbolRefExpr::VK_COFF_IMGREL32, Ctx),
4);
@@ -437,8 +437,8 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) {
FSM.emitFrameDataRecord(OS, Inst.Label);
}
- OS.EmitValueToAlignment(4, 0);
- OS.EmitLabel(FrameEnd);
+ OS.emitValueToAlignment(4, 0);
+ OS.emitLabel(FrameEnd);
return false;
}