diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86MCInstLower.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 969 |
1 files changed, 473 insertions, 496 deletions
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 2fc9a2af01d76..9ce2a4637e2ea 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -14,11 +14,12 @@ #include "MCTargetDesc/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86InstComments.h" +#include "MCTargetDesc/X86ShuffleDecode.h" #include "MCTargetDesc/X86TargetStreamer.h" -#include "Utils/X86ShuffleDecode.h" #include "X86AsmPrinter.h" #include "X86RegisterInfo.h" #include "X86ShuffleDecodeConstantPool.h" +#include "X86Subtarget.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/iterator_range.h" @@ -43,6 +44,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -72,9 +74,30 @@ private: } // end anonymous namespace +/// A RAII helper which defines a region of instructions which can't have +/// padding added between them for correctness. +struct NoAutoPaddingScope { + MCStreamer &OS; + const bool OldAllowAutoPadding; + NoAutoPaddingScope(MCStreamer &OS) + : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { + changeAndComment(false); + } + ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); } + void changeAndComment(bool b) { + if (b == OS.getAllowAutoPadding()) + return; + OS.setAllowAutoPadding(b); + if (b) + OS.emitRawComment("autopadding"); + else + OS.emitRawComment("noautopadding"); + } +}; + // Emit a minimal sequence of nops spanning NumBytes bytes. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, - const MCSubtargetInfo &STI); +static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, + const X86Subtarget *Subtarget); void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, const MCSubtargetInfo &STI, @@ -94,13 +117,13 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { if (InShadow && CurrentShadowSize < RequiredShadowSize) { InShadow = false; - EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, - MF->getSubtarget<X86Subtarget>().is64Bit(), STI); + emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, + &MF->getSubtarget<X86Subtarget>()); } } void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { - OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); + OutStreamer->emitInstruction(Inst, getSubtargetInfo()); SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); } @@ -116,6 +139,10 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol /// operand to an MCSymbol. MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { + const Triple &TT = TM.getTargetTriple(); + if (MO.isGlobal() && TT.isOSBinFormatELF()) + return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal()); + const DataLayout &DL = MF.getDataLayout(); assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); @@ -272,7 +299,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, // local labels. This is only safe when the symbols are in the same // section so we are restricting it to jumptable references. MCSymbol *Label = Ctx.createTempSymbol(); - AsmPrinter.OutStreamer->EmitAssignment(Label, Expr); + AsmPrinter.OutStreamer->emitAssignment(Label, Expr); Expr = MCSymbolRefExpr::create(Label, Ctx); } break; @@ -482,6 +509,26 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { "LEA has segment specified!"); break; + case X86::MULX32Hrr: + case X86::MULX32Hrm: + case X86::MULX64Hrr: + case X86::MULX64Hrm: { + // Turn into regular MULX by duplicating the destination. + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; + case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; + case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; + case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; + } + OutMI.setOpcode(NewOpc); + // Duplicate the destination. + unsigned DestReg = OutMI.getOperand(0).getReg(); + OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg)); + break; + } + // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B // if one of the registers is extended, but other isn't. case X86::VMOVZPQILo2PQIrr: @@ -929,6 +976,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI) { + NoAutoPaddingScope NoPadScope(*OutStreamer); bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 || MI.getOpcode() == X86::TLS_base_addr64; MCContext &Ctx = OutStreamer->getContext(); @@ -1034,29 +1082,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, /// Return the longest nop which can be efficiently decoded for the given /// target cpu. 15-bytes is the longest single NOP instruction, but some /// platforms can't decode the longest forms efficiently. -static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) { - uint64_t MaxNopLength = 10; - if (STI.getFeatureBits()[X86::ProcIntelSLM]) - MaxNopLength = 7; - else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) - MaxNopLength = 15; - else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP]) - MaxNopLength = 11; - return MaxNopLength; +static unsigned maxLongNopLength(const X86Subtarget *Subtarget) { + if (Subtarget->getFeatureBits()[X86::ProcIntelSLM]) + return 7; + if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP]) + return 15; + if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP]) + return 11; + if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit()) + return 10; + if (Subtarget->is32Bit()) + return 2; + return 1; } /// Emit the largest nop instruction smaller than or equal to \p NumBytes /// bytes. Return the size of nop emitted. -static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, - const MCSubtargetInfo &STI) { - if (!Is64Bit) { - // TODO Do additional checking if the CPU supports multi-byte nops. - OS.EmitInstruction(MCInstBuilder(X86::NOOP), STI); - return 1; - } - +static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, + const X86Subtarget *Subtarget) { // Cap a single nop emission at the profitable value for the target - NumBytes = std::min(NumBytes, MaxLongNopLength(STI)); + NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget)); unsigned NopSize; unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; @@ -1125,25 +1170,26 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); NopSize += NumPrefixes; for (unsigned i = 0; i != NumPrefixes; ++i) - OS.EmitBytes("\x66"); + OS.emitBytes("\x66"); switch (Opc) { default: llvm_unreachable("Unexpected opcode"); case X86::NOOP: - OS.EmitInstruction(MCInstBuilder(Opc), STI); + OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); break; case X86::XCHG16ar: - OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI); + OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), + *Subtarget); break; case X86::NOOPL: case X86::NOOPW: - OS.EmitInstruction(MCInstBuilder(Opc) + OS.emitInstruction(MCInstBuilder(Opc) .addReg(BaseReg) .addImm(ScaleVal) .addReg(IndexReg) .addImm(Displacement) .addReg(SegmentReg), - STI); + *Subtarget); break; } assert(NopSize <= NumBytes && "We overemitted?"); @@ -1151,39 +1197,16 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, } /// Emit the optimal amount of multi-byte nops on X86. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, - const MCSubtargetInfo &STI) { +static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, + const X86Subtarget *Subtarget) { unsigned NopsToEmit = NumBytes; (void)NopsToEmit; while (NumBytes) { - NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI); + NumBytes -= emitNop(OS, NumBytes, Subtarget); assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); } } -/// A RAII helper which defines a region of instructions which can't have -/// padding added between them for correctness. -struct NoAutoPaddingScope { - MCStreamer &OS; - const bool OldAllowAutoPadding; - NoAutoPaddingScope(MCStreamer &OS) - : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { - changeAndComment(false); - } - ~NoAutoPaddingScope() { - changeAndComment(OldAllowAutoPadding); - } - void changeAndComment(bool b) { - if (b == OS.getAllowAutoPadding()) - return; - OS.setAllowAutoPadding(b); - if (b) - OS.emitRawComment("autopadding"); - else - OS.emitRawComment("noautopadding"); - } -}; - void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL) { assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); @@ -1192,8 +1215,7 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, StatepointOpers SOpers(&MI); if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { - EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(), - getSubtargetInfo()); + emitX86Nops(*OutStreamer, PatchBytes, Subtarget); } else { // Lower call target and choose correct opcode const MachineOperand &CallTarget = SOpers.getCallTarget(); @@ -1220,8 +1242,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, break; case MachineOperand::MO_Register: // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) - report_fatal_error("Lowering register statepoints with retpoline not " + if (Subtarget->useIndirectThunkCalls()) + report_fatal_error("Lowering register statepoints with thunks not " "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); CallOpcode = X86::CALL64r; @@ -1235,14 +1257,14 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, MCInst CallInst; CallInst.setOpcode(CallOpcode); CallInst.addOperand(CallTargetMCOp); - OutStreamer->EmitInstruction(CallInst, getSubtargetInfo()); + OutStreamer->emitInstruction(CallInst, getSubtargetInfo()); } // Record our statepoint node in the same section used by STACKMAP // and PATCHPOINT auto &Ctx = OutStreamer->getContext(); MCSymbol *MILabel = Ctx.createTempSymbol(); - OutStreamer->EmitLabel(MILabel); + OutStreamer->emitLabel(MILabel); SM.recordStatepoint(*MILabel, MI); } @@ -1262,7 +1284,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, auto &Ctx = OutStreamer->getContext(); MCSymbol *FaultingLabel = Ctx.createTempSymbol(); - OutStreamer->EmitLabel(FaultingLabel); + OutStreamer->emitLabel(FaultingLabel); assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); @@ -1280,7 +1302,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, MI.addOperand(MaybeOperand.getValue()); OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); - OutStreamer->EmitInstruction(MI, getSubtargetInfo()); + OutStreamer->emitInstruction(MI, getSubtargetInfo()); } void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, @@ -1317,7 +1339,17 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo()); if (Code.size() < MinSize) { - if (MinSize == 2 && Opcode == X86::PUSH64r) { + if (MinSize == 2 && Subtarget->is32Bit() && + Subtarget->isTargetWindowsMSVC() && + (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) { + // For compatibilty reasons, when targetting MSVC, is is important to + // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools + // rely specifically on this pattern to be able to patch a function. + // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. + OutStreamer->emitInstruction( + MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), + *Subtarget); + } else if (MinSize == 2 && Opcode == X86::PUSH64r) { // This is an optimization that lets us get away without emitting a nop in // many cases. // @@ -1325,14 +1357,13 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, // bytes too, so the check on MinSize is important. MCI.setOpcode(X86::PUSH64rmr); } else { - unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(), - getSubtargetInfo()); + unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget); assert(NopSize == MinSize && "Could not implement MinSize!"); (void)NopSize; } } - OutStreamer->EmitInstruction(MCI, getSubtargetInfo()); + OutStreamer->emitInstruction(MCI, getSubtargetInfo()); } // Lower a stackmap of the form: @@ -1342,7 +1373,7 @@ void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { auto &Ctx = OutStreamer->getContext(); MCSymbol *MILabel = Ctx.createTempSymbol(); - OutStreamer->EmitLabel(MILabel); + OutStreamer->emitLabel(MILabel); SM.recordStackMap(*MILabel, MI); unsigned NumShadowBytes = MI.getOperand(1).getImm(); @@ -1361,7 +1392,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, auto &Ctx = OutStreamer->getContext(); MCSymbol *MILabel = Ctx.createTempSymbol(); - OutStreamer->EmitLabel(MILabel); + OutStreamer->emitLabel(MILabel); SM.recordPatchPoint(*MILabel, MI); PatchPointOpers opers(&MI); @@ -1399,9 +1430,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) + if (Subtarget->useIndirectThunkCalls()) report_fatal_error( - "Lowering patchpoint with retpoline not yet implemented."); + "Lowering patchpoint with thunks not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } @@ -1410,8 +1441,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, assert(NumBytes >= EncodedBytes && "Patchpoint can't request size less than the length of a call."); - EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(), - getSubtargetInfo()); + emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget); } void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, @@ -1442,13 +1472,13 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, // First we emit the label and the jump. auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); OutStreamer->AddComment("# XRay Custom Event Log"); - OutStreamer->EmitCodeAlignment(2); - OutStreamer->EmitLabel(CurSled); + OutStreamer->emitCodeAlignment(2); + OutStreamer->emitLabel(CurSled); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. - OutStreamer->EmitBinaryData("\xeb\x0f"); + OutStreamer->emitBinaryData("\xeb\x0f"); // The default C calling convention will place two arguments into %rcx and // %rdx -- so we only work with those. @@ -1471,7 +1501,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); } else { - EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 4, Subtarget); } } @@ -1500,14 +1530,14 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, if (UsedMask[I]) EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); else - EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 1, Subtarget); OutStreamer->AddComment("xray custom event end."); - // Record the sled version. Older versions of this sled were spelled - // differently, so we let the runtime handle the different offsets we're - // using. - recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); + // Record the sled version. Version 0 of this sled was spelled differently, so + // we let the runtime handle the different offsets we're using. Version 2 + // changed the absolute address to a PC-relative address. + recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2); } void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, @@ -1538,13 +1568,13 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, // First we emit the label and the jump. auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); OutStreamer->AddComment("# XRay Typed Event Log"); - OutStreamer->EmitCodeAlignment(2); - OutStreamer->EmitLabel(CurSled); + OutStreamer->emitCodeAlignment(2); + OutStreamer->emitLabel(CurSled); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. - OutStreamer->EmitBinaryData("\xeb\x14"); + OutStreamer->emitBinaryData("\xeb\x14"); // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, // so we'll work with those. Or we may be called via SystemV, in which case @@ -1569,7 +1599,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); } else { - EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 4, Subtarget); } } @@ -1603,12 +1633,12 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, if (UsedMask[I]) EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); else - EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, 1, Subtarget); OutStreamer->AddComment("xray typed event end."); // Record the sled version. - recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0); + recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2); } void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, @@ -1623,7 +1653,7 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, .getValueAsString() .getAsInteger(10, Num)) return; - EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo()); + emitX86Nops(*OutStreamer, Num, Subtarget); return; } // We want to emit the following pattern: @@ -1640,15 +1670,15 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, // call <relative offset, 32-bits> // 5 bytes // auto CurSled = OutContext.createTempSymbol("xray_sled_", true); - OutStreamer->EmitCodeAlignment(2); - OutStreamer->EmitLabel(CurSled); + OutStreamer->emitCodeAlignment(2); + OutStreamer->emitLabel(CurSled); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. - OutStreamer->EmitBytes("\xeb\x09"); - EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); - recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); + OutStreamer->emitBytes("\xeb\x09"); + emitX86Nops(*OutStreamer, 9, Subtarget); + recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); } void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, @@ -1670,17 +1700,17 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, // // This just makes sure that the alignment for the next instruction is 2. auto CurSled = OutContext.createTempSymbol("xray_sled_", true); - OutStreamer->EmitCodeAlignment(2); - OutStreamer->EmitLabel(CurSled); + OutStreamer->emitCodeAlignment(2); + OutStreamer->emitLabel(CurSled); unsigned OpCode = MI.getOperand(0).getImm(); MCInst Ret; Ret.setOpcode(OpCode); for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) Ret.addOperand(MaybeOperand.getValue()); - OutStreamer->EmitInstruction(Ret, getSubtargetInfo()); - EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); - recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); + OutStreamer->emitInstruction(Ret, getSubtargetInfo()); + emitX86Nops(*OutStreamer, 10, Subtarget); + recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); } void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, @@ -1694,17 +1724,17 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual // tail call much like how we have it in PATCHABLE_RET. auto CurSled = OutContext.createTempSymbol("xray_sled_", true); - OutStreamer->EmitCodeAlignment(2); - OutStreamer->EmitLabel(CurSled); + OutStreamer->emitCodeAlignment(2); + OutStreamer->emitLabel(CurSled); auto Target = OutContext.createTempSymbol(); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. - OutStreamer->EmitBytes("\xeb\x09"); - EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); - OutStreamer->EmitLabel(Target); - recordSled(CurSled, MI, SledKind::TAIL_CALL); + OutStreamer->emitBytes("\xeb\x09"); + emitX86Nops(*OutStreamer, 9, Subtarget); + OutStreamer->emitLabel(Target); + recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); unsigned OpCode = MI.getOperand(0).getImm(); OpCode = convertTailJumpOpcode(OpCode); @@ -1717,7 +1747,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) TC.addOperand(MaybeOperand.getValue()); - OutStreamer->EmitInstruction(TC, getSubtargetInfo()); + OutStreamer->emitInstruction(TC, getSubtargetInfo()); } // Returns instruction preceding MBBI in MachineFunction. @@ -1961,281 +1991,9 @@ static unsigned getRegisterWidth(const MCOperandInfo &Info) { llvm_unreachable("Unknown register class!"); } -void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { - X86MCInstLower MCInstLowering(*MF, *this); - const X86RegisterInfo *RI = - MF->getSubtarget<X86Subtarget>().getRegisterInfo(); - - // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that - // are compressed from EVEX encoding to VEX encoding. - if (TM.Options.MCOptions.ShowMCEncoding) { - if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) - OutStreamer->AddComment("EVEX TO VEX Compression ", false); - } - +static void addConstantComments(const MachineInstr *MI, + MCStreamer &OutStreamer) { switch (MI->getOpcode()) { - case TargetOpcode::DBG_VALUE: - llvm_unreachable("Should be handled target independently"); - - // Emit nothing here but a comment if we can. - case X86::Int_MemBarrier: - OutStreamer->emitRawComment("MEMBARRIER"); - return; - - case X86::EH_RETURN: - case X86::EH_RETURN64: { - // Lower these as normal, but add some comments. - Register Reg = MI->getOperand(0).getReg(); - OutStreamer->AddComment(StringRef("eh_return, addr: %") + - X86ATTInstPrinter::getRegisterName(Reg)); - break; - } - case X86::CLEANUPRET: { - // Lower these as normal, but add some comments. - OutStreamer->AddComment("CLEANUPRET"); - break; - } - - case X86::CATCHRET: { - // Lower these as normal, but add some comments. - OutStreamer->AddComment("CATCHRET"); - break; - } - - case X86::TAILJMPr: - case X86::TAILJMPm: - case X86::TAILJMPd: - case X86::TAILJMPd_CC: - case X86::TAILJMPr64: - case X86::TAILJMPm64: - case X86::TAILJMPd64: - case X86::TAILJMPd64_CC: - case X86::TAILJMPr64_REX: - case X86::TAILJMPm64_REX: - // Lower these as normal, but add some comments. - OutStreamer->AddComment("TAILCALL"); - break; - - case X86::TLS_addr32: - case X86::TLS_addr64: - case X86::TLS_base_addr32: - case X86::TLS_base_addr64: - return LowerTlsAddr(MCInstLowering, *MI); - - // Loading/storing mask pairs requires two kmov operations. The second one of these - // needs a 2 byte displacement relative to the specified address (with 32 bit spill - // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size, - // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD. - // - // The displacement value might wrap around in theory, thus the asserts in both - // cases. - case X86::MASKPAIR16LOAD: { - int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm(); - assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - Register Reg = MI->getOperand(0).getReg(); - Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); - - // Load the first mask register - MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm); - MIB.addReg(Reg0); - for (int i = 0; i < X86::AddrNumOperands; ++i) { - auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); - MIB.addOperand(Op.getValue()); - } - EmitAndCountInstruction(MIB); - - // Load the second mask register of the pair - MIB = MCInstBuilder(X86::KMOVWkm); - MIB.addReg(Reg1); - for (int i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) { - MIB.addImm(Disp + 2); - } else { - auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); - MIB.addOperand(Op.getValue()); - } - } - EmitAndCountInstruction(MIB); - return; - } - - case X86::MASKPAIR16STORE: { - int64_t Disp = MI->getOperand(X86::AddrDisp).getImm(); - assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - Register Reg = MI->getOperand(X86::AddrNumOperands).getReg(); - Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); - - // Store the first mask register - MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk); - for (int i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue()); - MIB.addReg(Reg0); - EmitAndCountInstruction(MIB); - - // Store the second mask register of the pair - MIB = MCInstBuilder(X86::KMOVWmk); - for (int i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) { - MIB.addImm(Disp + 2); - } else { - auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i)); - MIB.addOperand(Op.getValue()); - } - } - MIB.addReg(Reg1); - EmitAndCountInstruction(MIB); - return; - } - - case X86::MOVPC32r: { - // This is a pseudo op for a two instruction sequence with a label, which - // looks like: - // call "L1$pb" - // "L1$pb": - // popl %esi - - // Emit the call. - MCSymbol *PICBase = MF->getPICBaseSymbol(); - // FIXME: We would like an efficient form for this, so we don't have to do a - // lot of extra uniquing. - EmitAndCountInstruction( - MCInstBuilder(X86::CALLpcrel32) - .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); - - const X86FrameLowering *FrameLowering = - MF->getSubtarget<X86Subtarget>().getFrameLowering(); - bool hasFP = FrameLowering->hasFP(*MF); - - // TODO: This is needed only if we require precise CFA. - bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && - !OutStreamer->getDwarfFrameInfos().back().End; - - int stackGrowth = -RI->getSlotSize(); - - if (HasActiveDwarfFrame && !hasFP) { - OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth); - } - - // Emit the label. - OutStreamer->EmitLabel(PICBase); - - // popl $reg - EmitAndCountInstruction( - MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); - - if (HasActiveDwarfFrame && !hasFP) { - OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth); - } - return; - } - - case X86::ADD32ri: { - // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. - if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) - break; - - // Okay, we have something like: - // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) - - // For this, we want to print something like: - // MYGLOBAL + (. - PICBASE) - // However, we can't generate a ".", so just emit a new label here and refer - // to it. - MCSymbol *DotSym = OutContext.createTempSymbol(); - OutStreamer->EmitLabel(DotSym); - - // Now that we have emitted the label, lower the complex operand expression. - MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); - - const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); - const MCExpr *PICBase = - MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); - DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); - - DotExpr = MCBinaryExpr::createAdd( - MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); - - EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(DotExpr)); - return; - } - case TargetOpcode::STATEPOINT: - return LowerSTATEPOINT(*MI, MCInstLowering); - - case TargetOpcode::FAULTING_OP: - return LowerFAULTING_OP(*MI, MCInstLowering); - - case TargetOpcode::FENTRY_CALL: - return LowerFENTRY_CALL(*MI, MCInstLowering); - - case TargetOpcode::PATCHABLE_OP: - return LowerPATCHABLE_OP(*MI, MCInstLowering); - - case TargetOpcode::STACKMAP: - return LowerSTACKMAP(*MI); - - case TargetOpcode::PATCHPOINT: - return LowerPATCHPOINT(*MI, MCInstLowering); - - case TargetOpcode::PATCHABLE_FUNCTION_ENTER: - return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); - - case TargetOpcode::PATCHABLE_RET: - return LowerPATCHABLE_RET(*MI, MCInstLowering); - - case TargetOpcode::PATCHABLE_TAIL_CALL: - return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); - - case TargetOpcode::PATCHABLE_EVENT_CALL: - return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); - - case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: - return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); - - case X86::MORESTACK_RET: - EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); - return; - - case X86::MORESTACK_RET_RESTORE_R10: - // Return, then restore R10. - EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); - EmitAndCountInstruction( - MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); - return; - - case X86::SEH_PushReg: - case X86::SEH_SaveReg: - case X86::SEH_SaveXMM: - case X86::SEH_StackAlloc: - case X86::SEH_StackAlign: - case X86::SEH_SetFrame: - case X86::SEH_PushFrame: - case X86::SEH_EndPrologue: - EmitSEHInstruction(MI); - return; - - case X86::SEH_Epilogue: { - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - MachineBasicBlock::const_iterator MBBI(MI); - // Check if preceded by a call and emit nop if so. - for (MBBI = PrevCrossBBInst(MBBI); - MBBI != MachineBasicBlock::const_iterator(); - MBBI = PrevCrossBBInst(MBBI)) { - // Conservatively assume that pseudo instructions don't emit code and keep - // looking for a call. We may emit an unnecessary nop in some cases. - if (!MBBI->isPseudo()) { - if (MBBI->isCall()) - EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); - break; - } - } - return; - } - // Lower PSHUFB and VPERMILP normally but add a comment if we can find // a constant shuffle mask. We won't be able to do this at the MC layer // because the mask isn't an immediate. @@ -2251,30 +2009,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::VPSHUFBZrm: case X86::VPSHUFBZrmk: case X86::VPSHUFBZrmkz: { - if (!OutStreamer->isVerboseAsm()) - break; - unsigned SrcIdx, MaskIdx; - switch (MI->getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::PSHUFBrm: - case X86::VPSHUFBrm: - case X86::VPSHUFBYrm: - case X86::VPSHUFBZ128rm: - case X86::VPSHUFBZ256rm: - case X86::VPSHUFBZrm: - SrcIdx = 1; MaskIdx = 5; break; - case X86::VPSHUFBZ128rmkz: - case X86::VPSHUFBZ256rmkz: - case X86::VPSHUFBZrmkz: - SrcIdx = 2; MaskIdx = 6; break; - case X86::VPSHUFBZ128rmk: - case X86::VPSHUFBZ256rmk: - case X86::VPSHUFBZrmk: - SrcIdx = 3; MaskIdx = 7; break; + unsigned SrcIdx = 1; + if (X86II::isKMasked(MI->getDesc().TSFlags)) { + // Skip mask operand. + ++SrcIdx; + if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { + // Skip passthru operand. + ++SrcIdx; + } } + unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp; - assert(MI->getNumOperands() >= 6 && - "We should always have at least 6 operands!"); + assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) && + "Unexpected number of operands!"); const MachineOperand &MaskOp = MI->getOperand(MaskIdx); if (auto *C = getConstantFromPool(*MI, MaskOp)) { @@ -2282,7 +2029,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector<int, 64> Mask; DecodePSHUFBMask(C, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); + OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); } break; } @@ -2309,9 +2056,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::VPERMILPDZrm: case X86::VPERMILPDZrmk: case X86::VPERMILPDZrmkz: { - if (!OutStreamer->isVerboseAsm()) - break; - unsigned SrcIdx, MaskIdx; unsigned ElSize; switch (MI->getOpcode()) { default: llvm_unreachable("Invalid opcode"); @@ -2320,33 +2064,42 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::VPERMILPSZ128rm: case X86::VPERMILPSZ256rm: case X86::VPERMILPSZrm: - SrcIdx = 1; MaskIdx = 5; ElSize = 32; break; case X86::VPERMILPSZ128rmkz: case X86::VPERMILPSZ256rmkz: case X86::VPERMILPSZrmkz: - SrcIdx = 2; MaskIdx = 6; ElSize = 32; break; case X86::VPERMILPSZ128rmk: case X86::VPERMILPSZ256rmk: case X86::VPERMILPSZrmk: - SrcIdx = 3; MaskIdx = 7; ElSize = 32; break; + ElSize = 32; + break; case X86::VPERMILPDrm: case X86::VPERMILPDYrm: case X86::VPERMILPDZ128rm: case X86::VPERMILPDZ256rm: case X86::VPERMILPDZrm: - SrcIdx = 1; MaskIdx = 5; ElSize = 64; break; case X86::VPERMILPDZ128rmkz: case X86::VPERMILPDZ256rmkz: case X86::VPERMILPDZrmkz: - SrcIdx = 2; MaskIdx = 6; ElSize = 64; break; case X86::VPERMILPDZ128rmk: case X86::VPERMILPDZ256rmk: case X86::VPERMILPDZrmk: - SrcIdx = 3; MaskIdx = 7; ElSize = 64; break; + ElSize = 64; + break; + } + + unsigned SrcIdx = 1; + if (X86II::isKMasked(MI->getDesc().TSFlags)) { + // Skip mask operand. + ++SrcIdx; + if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { + // Skip passthru operand. + ++SrcIdx; + } } + unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp; - assert(MI->getNumOperands() >= 6 && - "We should always have at least 6 operands!"); + assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) && + "Unexpected number of operands!"); const MachineOperand &MaskOp = MI->getOperand(MaskIdx); if (auto *C = getConstantFromPool(*MI, MaskOp)) { @@ -2354,7 +2107,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector<int, 16> Mask; DecodeVPERMILPMask(C, ElSize, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); + OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); } break; } @@ -2363,10 +2116,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::VPERMIL2PSrm: case X86::VPERMIL2PDYrm: case X86::VPERMIL2PSYrm: { - if (!OutStreamer->isVerboseAsm()) - break; - assert(MI->getNumOperands() >= 8 && - "We should always have at least 8 operands!"); + assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && + "Unexpected number of operands!"); const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); if (!CtrlOp.isImm()) @@ -2379,47 +2130,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; } - const MachineOperand &MaskOp = MI->getOperand(6); + const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp); if (auto *C = getConstantFromPool(*MI, MaskOp)) { unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]); SmallVector<int, 16> Mask; DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); + OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); } break; } case X86::VPPERMrrm: { - if (!OutStreamer->isVerboseAsm()) - break; - assert(MI->getNumOperands() >= 7 && - "We should always have at least 7 operands!"); + assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) && + "Unexpected number of operands!"); - const MachineOperand &MaskOp = MI->getOperand(6); + const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp); if (auto *C = getConstantFromPool(*MI, MaskOp)) { unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]); SmallVector<int, 16> Mask; DecodeVPPERMMask(C, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); + OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); } break; } case X86::MMX_MOVQ64rm: { - if (!OutStreamer->isVerboseAsm()) - break; - if (MI->getNumOperands() <= 4) - break; - if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) && + "Unexpected number of operands!"); + if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) { std::string Comment; raw_string_ostream CS(Comment); const MachineOperand &DstOp = MI->getOperand(0); CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; if (auto *CF = dyn_cast<ConstantFP>(C)) { CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false); - OutStreamer->AddComment(CS.str()); + OutStreamer.AddComment(CS.str()); } } break; @@ -2470,11 +2217,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::VBROADCASTI64X2Z128rm: case X86::VBROADCASTI64X2rm: case X86::VBROADCASTI64X4rm: - if (!OutStreamer->isVerboseAsm()) - break; - if (MI->getNumOperands() <= 4) - break; - if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) && + "Unexpected number of operands!"); + if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) { int NumLanes = 1; // Override NumLanes for the broadcast instructions. switch (MI->getOpcode()) { @@ -2516,7 +2261,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } } CS << "]"; - OutStreamer->AddComment(CS.str()); + OutStreamer.AddComment(CS.str()); } else if (auto *CV = dyn_cast<ConstantVector>(C)) { CS << "<"; for (int l = 0; l != NumLanes; ++l) { @@ -2528,80 +2273,79 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } } CS << ">"; - OutStreamer->AddComment(CS.str()); + OutStreamer.AddComment(CS.str()); } } break; + case X86::MOVDDUPrm: case X86::VMOVDDUPrm: case X86::VMOVDDUPZ128rm: case X86::VBROADCASTSSrm: case X86::VBROADCASTSSYrm: - case X86::VBROADCASTSSZ128m: - case X86::VBROADCASTSSZ256m: - case X86::VBROADCASTSSZm: + case X86::VBROADCASTSSZ128rm: + case X86::VBROADCASTSSZ256rm: + case X86::VBROADCASTSSZrm: case X86::VBROADCASTSDYrm: - case X86::VBROADCASTSDZ256m: - case X86::VBROADCASTSDZm: + case X86::VBROADCASTSDZ256rm: + case X86::VBROADCASTSDZrm: case X86::VPBROADCASTBrm: case X86::VPBROADCASTBYrm: - case X86::VPBROADCASTBZ128m: - case X86::VPBROADCASTBZ256m: - case X86::VPBROADCASTBZm: + case X86::VPBROADCASTBZ128rm: + case X86::VPBROADCASTBZ256rm: + case X86::VPBROADCASTBZrm: case X86::VPBROADCASTDrm: case X86::VPBROADCASTDYrm: - case X86::VPBROADCASTDZ128m: - case X86::VPBROADCASTDZ256m: - case X86::VPBROADCASTDZm: + case X86::VPBROADCASTDZ128rm: + case X86::VPBROADCASTDZ256rm: + case X86::VPBROADCASTDZrm: case X86::VPBROADCASTQrm: case X86::VPBROADCASTQYrm: - case X86::VPBROADCASTQZ128m: - case X86::VPBROADCASTQZ256m: - case X86::VPBROADCASTQZm: + case X86::VPBROADCASTQZ128rm: + case X86::VPBROADCASTQZ256rm: + case X86::VPBROADCASTQZrm: case X86::VPBROADCASTWrm: case X86::VPBROADCASTWYrm: - case X86::VPBROADCASTWZ128m: - case X86::VPBROADCASTWZ256m: - case X86::VPBROADCASTWZm: - if (!OutStreamer->isVerboseAsm()) - break; - if (MI->getNumOperands() <= 4) - break; - if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + case X86::VPBROADCASTWZ128rm: + case X86::VPBROADCASTWZ256rm: + case X86::VPBROADCASTWZrm: + assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) && + "Unexpected number of operands!"); + if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) { int NumElts; switch (MI->getOpcode()) { default: llvm_unreachable("Invalid opcode"); - case X86::MOVDDUPrm: NumElts = 2; break; - case X86::VMOVDDUPrm: NumElts = 2; break; - case X86::VMOVDDUPZ128rm: NumElts = 2; break; - case X86::VBROADCASTSSrm: NumElts = 4; break; - case X86::VBROADCASTSSYrm: NumElts = 8; break; - case X86::VBROADCASTSSZ128m: NumElts = 4; break; - case X86::VBROADCASTSSZ256m: NumElts = 8; break; - case X86::VBROADCASTSSZm: NumElts = 16; break; - case X86::VBROADCASTSDYrm: NumElts = 4; break; - case X86::VBROADCASTSDZ256m: NumElts = 4; break; - case X86::VBROADCASTSDZm: NumElts = 8; break; - case X86::VPBROADCASTBrm: NumElts = 16; break; - case X86::VPBROADCASTBYrm: NumElts = 32; break; - case X86::VPBROADCASTBZ128m: NumElts = 16; break; - case X86::VPBROADCASTBZ256m: NumElts = 32; break; - case X86::VPBROADCASTBZm: NumElts = 64; break; - case X86::VPBROADCASTDrm: NumElts = 4; break; - case X86::VPBROADCASTDYrm: NumElts = 8; break; - case X86::VPBROADCASTDZ128m: NumElts = 4; break; - case X86::VPBROADCASTDZ256m: NumElts = 8; break; - case X86::VPBROADCASTDZm: NumElts = 16; break; - case X86::VPBROADCASTQrm: NumElts = 2; break; - case X86::VPBROADCASTQYrm: NumElts = 4; break; - case X86::VPBROADCASTQZ128m: NumElts = 2; break; - case X86::VPBROADCASTQZ256m: NumElts = 4; break; - case X86::VPBROADCASTQZm: NumElts = 8; break; - case X86::VPBROADCASTWrm: NumElts = 8; break; - case X86::VPBROADCASTWYrm: NumElts = 16; break; - case X86::VPBROADCASTWZ128m: NumElts = 8; break; - case X86::VPBROADCASTWZ256m: NumElts = 16; break; - case X86::VPBROADCASTWZm: NumElts = 32; break; + case X86::MOVDDUPrm: NumElts = 2; break; + case X86::VMOVDDUPrm: NumElts = 2; break; + case X86::VMOVDDUPZ128rm: NumElts = 2; break; + case X86::VBROADCASTSSrm: NumElts = 4; break; + case X86::VBROADCASTSSYrm: NumElts = 8; break; + case X86::VBROADCASTSSZ128rm: NumElts = 4; break; + case X86::VBROADCASTSSZ256rm: NumElts = 8; break; + case X86::VBROADCASTSSZrm: NumElts = 16; break; + case X86::VBROADCASTSDYrm: NumElts = 4; break; + case X86::VBROADCASTSDZ256rm: NumElts = 4; break; + case X86::VBROADCASTSDZrm: NumElts = 8; break; + case X86::VPBROADCASTBrm: NumElts = 16; break; + case X86::VPBROADCASTBYrm: NumElts = 32; break; + case X86::VPBROADCASTBZ128rm: NumElts = 16; break; + case X86::VPBROADCASTBZ256rm: NumElts = 32; break; + case X86::VPBROADCASTBZrm: NumElts = 64; break; + case X86::VPBROADCASTDrm: NumElts = 4; break; + case X86::VPBROADCASTDYrm: NumElts = 8; break; + case X86::VPBROADCASTDZ128rm: NumElts = 4; break; + case X86::VPBROADCASTDZ256rm: NumElts = 8; break; + case X86::VPBROADCASTDZrm: NumElts = 16; break; + case X86::VPBROADCASTQrm: NumElts = 2; break; + case X86::VPBROADCASTQYrm: NumElts = 4; break; + case X86::VPBROADCASTQZ128rm: NumElts = 2; break; + case X86::VPBROADCASTQZ256rm: NumElts = 4; break; + case X86::VPBROADCASTQZrm: NumElts = 8; break; + case X86::VPBROADCASTWrm: NumElts = 8; break; + case X86::VPBROADCASTWYrm: NumElts = 16; break; + case X86::VPBROADCASTWZ128rm: NumElts = 8; break; + case X86::VPBROADCASTWZ256rm: NumElts = 16; break; + case X86::VPBROADCASTWZrm: NumElts = 32; break; } std::string Comment; @@ -2615,8 +2359,241 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { printConstant(C, CS); } CS << "]"; - OutStreamer->AddComment(CS.str()); + OutStreamer.AddComment(CS.str()); + } + } +} + +void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { + X86MCInstLower MCInstLowering(*MF, *this); + const X86RegisterInfo *RI = + MF->getSubtarget<X86Subtarget>().getRegisterInfo(); + + // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that + // are compressed from EVEX encoding to VEX encoding. + if (TM.Options.MCOptions.ShowMCEncoding) { + if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) + OutStreamer->AddComment("EVEX TO VEX Compression ", false); + } + + // Add comments for values loaded from constant pool. + if (OutStreamer->isVerboseAsm()) + addConstantComments(MI, *OutStreamer); + + switch (MI->getOpcode()) { + case TargetOpcode::DBG_VALUE: + llvm_unreachable("Should be handled target independently"); + + // Emit nothing here but a comment if we can. + case X86::Int_MemBarrier: + OutStreamer->emitRawComment("MEMBARRIER"); + return; + + case X86::EH_RETURN: + case X86::EH_RETURN64: { + // Lower these as normal, but add some comments. + Register Reg = MI->getOperand(0).getReg(); + OutStreamer->AddComment(StringRef("eh_return, addr: %") + + X86ATTInstPrinter::getRegisterName(Reg)); + break; + } + case X86::CLEANUPRET: { + // Lower these as normal, but add some comments. + OutStreamer->AddComment("CLEANUPRET"); + break; + } + + case X86::CATCHRET: { + // Lower these as normal, but add some comments. + OutStreamer->AddComment("CATCHRET"); + break; + } + + case X86::ENDBR32: + case X86::ENDBR64: { + // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for + // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be + // non-empty. If MI is the initial ENDBR, place the + // __patchable_function_entries label after ENDBR. + if (CurrentPatchableFunctionEntrySym && + CurrentPatchableFunctionEntrySym == CurrentFnBegin && + MI == &MF->front().front()) { + MCInst Inst; + MCInstLowering.Lower(MI, Inst); + EmitAndCountInstruction(Inst); + CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); + OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); + return; + } + break; + } + + case X86::TAILJMPr: + case X86::TAILJMPm: + case X86::TAILJMPd: + case X86::TAILJMPd_CC: + case X86::TAILJMPr64: + case X86::TAILJMPm64: + case X86::TAILJMPd64: + case X86::TAILJMPd64_CC: + case X86::TAILJMPr64_REX: + case X86::TAILJMPm64_REX: + // Lower these as normal, but add some comments. + OutStreamer->AddComment("TAILCALL"); + break; + + case X86::TLS_addr32: + case X86::TLS_addr64: + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: + return LowerTlsAddr(MCInstLowering, *MI); + + case X86::MOVPC32r: { + // This is a pseudo op for a two instruction sequence with a label, which + // looks like: + // call "L1$pb" + // "L1$pb": + // popl %esi + + // Emit the call. + MCSymbol *PICBase = MF->getPICBaseSymbol(); + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + EmitAndCountInstruction( + MCInstBuilder(X86::CALLpcrel32) + .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); + + const X86FrameLowering *FrameLowering = + MF->getSubtarget<X86Subtarget>().getFrameLowering(); + bool hasFP = FrameLowering->hasFP(*MF); + + // TODO: This is needed only if we require precise CFA. + bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && + !OutStreamer->getDwarfFrameInfos().back().End; + + int stackGrowth = -RI->getSlotSize(); + + if (HasActiveDwarfFrame && !hasFP) { + OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth); + } + + // Emit the label. + OutStreamer->emitLabel(PICBase); + + // popl $reg + EmitAndCountInstruction( + MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); + + if (HasActiveDwarfFrame && !hasFP) { + OutStreamer->emitCFIAdjustCfaOffset(stackGrowth); + } + return; + } + + case X86::ADD32ri: { + // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. + if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) + break; + + // Okay, we have something like: + // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) + + // For this, we want to print something like: + // MYGLOBAL + (. - PICBASE) + // However, we can't generate a ".", so just emit a new label here and refer + // to it. + MCSymbol *DotSym = OutContext.createTempSymbol(); + OutStreamer->emitLabel(DotSym); + + // Now that we have emitted the label, lower the complex operand expression. + MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); + + const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); + const MCExpr *PICBase = + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); + DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); + + DotExpr = MCBinaryExpr::createAdd( + MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); + + EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(DotExpr)); + return; + } + case TargetOpcode::STATEPOINT: + return LowerSTATEPOINT(*MI, MCInstLowering); + + case TargetOpcode::FAULTING_OP: + return LowerFAULTING_OP(*MI, MCInstLowering); + + case TargetOpcode::FENTRY_CALL: + return LowerFENTRY_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_OP: + return LowerPATCHABLE_OP(*MI, MCInstLowering); + + case TargetOpcode::STACKMAP: + return LowerSTACKMAP(*MI); + + case TargetOpcode::PATCHPOINT: + return LowerPATCHPOINT(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_RET: + return LowerPATCHABLE_RET(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_TAIL_CALL: + return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_EVENT_CALL: + return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: + return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); + + case X86::MORESTACK_RET: + EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); + return; + + case X86::MORESTACK_RET_RESTORE_R10: + // Return, then restore R10. + EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); + EmitAndCountInstruction( + MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); + return; + + case X86::SEH_PushReg: + case X86::SEH_SaveReg: + case X86::SEH_SaveXMM: + case X86::SEH_StackAlloc: + case X86::SEH_StackAlign: + case X86::SEH_SetFrame: + case X86::SEH_PushFrame: + case X86::SEH_EndPrologue: + EmitSEHInstruction(MI); + return; + + case X86::SEH_Epilogue: { + assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); + MachineBasicBlock::const_iterator MBBI(MI); + // Check if preceded by a call and emit nop if so. + for (MBBI = PrevCrossBBInst(MBBI); + MBBI != MachineBasicBlock::const_iterator(); + MBBI = PrevCrossBBInst(MBBI)) { + // Conservatively assume that pseudo instructions don't emit code and keep + // looking for a call. We may emit an unnecessary nop in some cases. + if (!MBBI->isPseudo()) { + if (MBBI->isCall()) + EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); + break; + } } + return; + } } MCInst TmpInst; @@ -2633,7 +2610,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // after it. SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); // Then emit the call - OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo()); + OutStreamer->emitInstruction(TmpInst, getSubtargetInfo()); return; } |