diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp | 980 | 
1 files changed, 469 insertions, 511 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp index f5caaaae4d84..9ce2a4637e2e 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -14,11 +14,12 @@  #include "MCTargetDesc/X86ATTInstPrinter.h"  #include "MCTargetDesc/X86BaseInfo.h"  #include "MCTargetDesc/X86InstComments.h" +#include "MCTargetDesc/X86ShuffleDecode.h"  #include "MCTargetDesc/X86TargetStreamer.h" -#include "Utils/X86ShuffleDecode.h"  #include "X86AsmPrinter.h"  #include "X86RegisterInfo.h"  #include "X86ShuffleDecodeConstantPool.h" +#include "X86Subtarget.h"  #include "llvm/ADT/Optional.h"  #include "llvm/ADT/SmallString.h"  #include "llvm/ADT/iterator_range.h" @@ -43,6 +44,7 @@  #include "llvm/MC/MCSymbol.h"  #include "llvm/MC/MCSymbolELF.h"  #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h"  using namespace llvm; @@ -72,9 +74,30 @@ private:  } // end anonymous namespace +/// A RAII helper which defines a region of instructions which can't have +/// padding added between them for correctness. +struct NoAutoPaddingScope { +  MCStreamer &OS; +  const bool OldAllowAutoPadding; +  NoAutoPaddingScope(MCStreamer &OS) +      : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { +    changeAndComment(false); +  } +  ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); } +  void changeAndComment(bool b) { +    if (b == OS.getAllowAutoPadding()) +      return; +    OS.setAllowAutoPadding(b); +    if (b) +      OS.emitRawComment("autopadding"); +    else +      OS.emitRawComment("noautopadding"); +  } +}; +  // Emit a minimal sequence of nops spanning NumBytes bytes. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, -                     const MCSubtargetInfo &STI); +static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, +                        const X86Subtarget *Subtarget);  void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,                                                   const MCSubtargetInfo &STI, @@ -94,13 +117,13 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(      MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {    if (InShadow && CurrentShadowSize < RequiredShadowSize) {      InShadow = false; -    EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, -             MF->getSubtarget<X86Subtarget>().is64Bit(), STI); +    emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, +                &MF->getSubtarget<X86Subtarget>());    }  }  void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { -  OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); +  OutStreamer->emitInstruction(Inst, getSubtargetInfo());    SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());  } @@ -116,6 +139,10 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {  /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol  /// operand to an MCSymbol.  MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { +  const Triple &TT = TM.getTargetTriple(); +  if (MO.isGlobal() && TT.isOSBinFormatELF()) +    return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal()); +    const DataLayout &DL = MF.getDataLayout();    assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&           "Isn't a symbol reference"); @@ -272,7 +299,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,        // local labels. This is only safe when the symbols are in the same        // section so we are restricting it to jumptable references.        MCSymbol *Label = Ctx.createTempSymbol(); -      AsmPrinter.OutStreamer->EmitAssignment(Label, Expr); +      AsmPrinter.OutStreamer->emitAssignment(Label, Expr);        Expr = MCSymbolRefExpr::create(Label, Ctx);      }      break; @@ -482,6 +509,26 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {             "LEA has segment specified!");      break; +  case X86::MULX32Hrr: +  case X86::MULX32Hrm: +  case X86::MULX64Hrr: +  case X86::MULX64Hrm: { +    // Turn into regular MULX by duplicating the destination. +    unsigned NewOpc; +    switch (OutMI.getOpcode()) { +    default: llvm_unreachable("Invalid opcode"); +    case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; +    case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; +    case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; +    case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; +    } +    OutMI.setOpcode(NewOpc); +    // Duplicate the destination. +    unsigned DestReg = OutMI.getOperand(0).getReg(); +    OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg)); +    break; +  } +    // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B    // if one of the registers is extended, but other isn't.    case X86::VMOVZPQILo2PQIrr: @@ -929,6 +976,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {  void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,                                   const MachineInstr &MI) { +  NoAutoPaddingScope NoPadScope(*OutStreamer);    bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||                    MI.getOpcode() == X86::TLS_base_addr64;    MCContext &Ctx = OutStreamer->getContext(); @@ -1034,29 +1082,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,  /// Return the longest nop which can be efficiently decoded for the given  /// target cpu.  15-bytes is the longest single NOP instruction, but some  /// platforms can't decode the longest forms efficiently. -static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) { -  uint64_t MaxNopLength = 10; -  if (STI.getFeatureBits()[X86::ProcIntelSLM]) -    MaxNopLength = 7; -  else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) -    MaxNopLength = 15; -  else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP]) -    MaxNopLength = 11; -  return MaxNopLength; +static unsigned maxLongNopLength(const X86Subtarget *Subtarget) { +  if (Subtarget->getFeatureBits()[X86::ProcIntelSLM]) +    return 7; +  if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP]) +    return 15; +  if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP]) +    return 11; +  if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit()) +    return 10; +  if (Subtarget->is32Bit()) +    return 2; +  return 1;  }  /// Emit the largest nop instruction smaller than or equal to \p NumBytes  /// bytes.  Return the size of nop emitted. -static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, -                        const MCSubtargetInfo &STI) { -  if (!Is64Bit) { -    // TODO Do additional checking if the CPU supports multi-byte nops. -    OS.EmitInstruction(MCInstBuilder(X86::NOOP), STI); -    return 1; -  } - +static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, +                        const X86Subtarget *Subtarget) {    // Cap a single nop emission at the profitable value for the target -  NumBytes = std::min(NumBytes, MaxLongNopLength(STI)); +  NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget));    unsigned NopSize;    unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; @@ -1125,25 +1170,26 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,    unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);    NopSize += NumPrefixes;    for (unsigned i = 0; i != NumPrefixes; ++i) -    OS.EmitBytes("\x66"); +    OS.emitBytes("\x66");    switch (Opc) {    default: llvm_unreachable("Unexpected opcode");    case X86::NOOP: -    OS.EmitInstruction(MCInstBuilder(Opc), STI); +    OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);      break;    case X86::XCHG16ar: -    OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI); +    OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), +                       *Subtarget);      break;    case X86::NOOPL:    case X86::NOOPW: -    OS.EmitInstruction(MCInstBuilder(Opc) +    OS.emitInstruction(MCInstBuilder(Opc)                             .addReg(BaseReg)                             .addImm(ScaleVal)                             .addReg(IndexReg)                             .addImm(Displacement)                             .addReg(SegmentReg), -                       STI); +                       *Subtarget);      break;    }    assert(NopSize <= NumBytes && "We overemitted?"); @@ -1151,39 +1197,16 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,  }  /// Emit the optimal amount of multi-byte nops on X86. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, -                     const MCSubtargetInfo &STI) { +static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, +                        const X86Subtarget *Subtarget) {    unsigned NopsToEmit = NumBytes;    (void)NopsToEmit;    while (NumBytes) { -    NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI); +    NumBytes -= emitNop(OS, NumBytes, Subtarget);      assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");    }  } -/// A RAII helper which defines a region of instructions which can't have -/// padding added between them for correctness. -struct NoAutoPaddingScope { -  MCStreamer &OS; -  const bool OldAllowAutoPadding; -  NoAutoPaddingScope(MCStreamer &OS) -    : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { -    changeAndComment(false); -  } -  ~NoAutoPaddingScope() { -    changeAndComment(OldAllowAutoPadding); -  } -  void changeAndComment(bool b) { -    if (b == OS.getAllowAutoPadding()) -      return; -    OS.setAllowAutoPadding(b); -    if (b) -      OS.emitRawComment("autopadding"); -    else -      OS.emitRawComment("noautopadding"); -  } -}; -  void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,                                      X86MCInstLower &MCIL) {    assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); @@ -1192,8 +1215,7 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,    StatepointOpers SOpers(&MI);    if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { -    EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(), -             getSubtargetInfo()); +    emitX86Nops(*OutStreamer, PatchBytes, Subtarget);    } else {      // Lower call target and choose correct opcode      const MachineOperand &CallTarget = SOpers.getCallTarget(); @@ -1235,14 +1257,14 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,      MCInst CallInst;      CallInst.setOpcode(CallOpcode);      CallInst.addOperand(CallTargetMCOp); -    OutStreamer->EmitInstruction(CallInst, getSubtargetInfo()); +    OutStreamer->emitInstruction(CallInst, getSubtargetInfo());    }    // Record our statepoint node in the same section used by STACKMAP    // and PATCHPOINT    auto &Ctx = OutStreamer->getContext();    MCSymbol *MILabel = Ctx.createTempSymbol(); -  OutStreamer->EmitLabel(MILabel); +  OutStreamer->emitLabel(MILabel);    SM.recordStatepoint(*MILabel, MI);  } @@ -1262,7 +1284,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,    auto &Ctx = OutStreamer->getContext();    MCSymbol *FaultingLabel = Ctx.createTempSymbol(); -  OutStreamer->EmitLabel(FaultingLabel); +  OutStreamer->emitLabel(FaultingLabel);    assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");    FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); @@ -1280,7 +1302,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,        MI.addOperand(MaybeOperand.getValue());    OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); -  OutStreamer->EmitInstruction(MI, getSubtargetInfo()); +  OutStreamer->emitInstruction(MI, getSubtargetInfo());  }  void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, @@ -1317,7 +1339,17 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,    CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());    if (Code.size() < MinSize) { -    if (MinSize == 2 && Opcode == X86::PUSH64r) { +    if (MinSize == 2 && Subtarget->is32Bit() && +        Subtarget->isTargetWindowsMSVC() && +        (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) { +      // For compatibilty reasons, when targetting MSVC, is is important to +      // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools +      // rely specifically on this pattern to be able to patch a function. +      // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. +      OutStreamer->emitInstruction( +          MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), +          *Subtarget); +    } else if (MinSize == 2 && Opcode == X86::PUSH64r) {        // This is an optimization that lets us get away without emitting a nop in        // many cases.        // @@ -1325,14 +1357,13 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,        // bytes too, so the check on MinSize is important.        MCI.setOpcode(X86::PUSH64rmr);      } else { -      unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(), -                                 getSubtargetInfo()); +      unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);        assert(NopSize == MinSize && "Could not implement MinSize!");        (void)NopSize;      }    } -  OutStreamer->EmitInstruction(MCI, getSubtargetInfo()); +  OutStreamer->emitInstruction(MCI, getSubtargetInfo());  }  // Lower a stackmap of the form: @@ -1342,7 +1373,7 @@ void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {    auto &Ctx = OutStreamer->getContext();    MCSymbol *MILabel = Ctx.createTempSymbol(); -  OutStreamer->EmitLabel(MILabel); +  OutStreamer->emitLabel(MILabel);    SM.recordStackMap(*MILabel, MI);    unsigned NumShadowBytes = MI.getOperand(1).getImm(); @@ -1361,7 +1392,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,    auto &Ctx = OutStreamer->getContext();    MCSymbol *MILabel = Ctx.createTempSymbol(); -  OutStreamer->EmitLabel(MILabel); +  OutStreamer->emitLabel(MILabel);    SM.recordPatchPoint(*MILabel, MI);    PatchPointOpers opers(&MI); @@ -1410,8 +1441,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,    assert(NumBytes >= EncodedBytes &&           "Patchpoint can't request size less than the length of a call."); -  EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(), -           getSubtargetInfo()); +  emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);  }  void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, @@ -1442,13 +1472,13 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,    // First we emit the label and the jump.    auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);    OutStreamer->AddComment("# XRay Custom Event Log"); -  OutStreamer->EmitCodeAlignment(2); -  OutStreamer->EmitLabel(CurSled); +  OutStreamer->emitCodeAlignment(2); +  OutStreamer->emitLabel(CurSled);    // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as    // an operand (computed as an offset from the jmp instruction).    // FIXME: Find another less hacky way do force the relative jump. -  OutStreamer->EmitBinaryData("\xeb\x0f"); +  OutStreamer->emitBinaryData("\xeb\x0f");    // The default C calling convention will place two arguments into %rcx and    // %rdx -- so we only work with those. @@ -1471,7 +1501,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,          EmitAndCountInstruction(              MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));        } else { -        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); +        emitX86Nops(*OutStreamer, 4, Subtarget);        }      } @@ -1500,14 +1530,14 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,      if (UsedMask[I])        EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));      else -      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); +      emitX86Nops(*OutStreamer, 1, Subtarget);    OutStreamer->AddComment("xray custom event end."); -  // Record the sled version. Older versions of this sled were spelled -  // differently, so we let the runtime handle the different offsets we're -  // using. -  recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); +  // Record the sled version. Version 0 of this sled was spelled differently, so +  // we let the runtime handle the different offsets we're using. Version 2 +  // changed the absolute address to a PC-relative address. +  recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);  }  void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, @@ -1538,13 +1568,13 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,    // First we emit the label and the jump.    auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);    OutStreamer->AddComment("# XRay Typed Event Log"); -  OutStreamer->EmitCodeAlignment(2); -  OutStreamer->EmitLabel(CurSled); +  OutStreamer->emitCodeAlignment(2); +  OutStreamer->emitLabel(CurSled);    // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as    // an operand (computed as an offset from the jmp instruction).    // FIXME: Find another less hacky way do force the relative jump. -  OutStreamer->EmitBinaryData("\xeb\x14"); +  OutStreamer->emitBinaryData("\xeb\x14");    // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,    // so we'll work with those. Or we may be called via SystemV, in which case @@ -1569,7 +1599,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,          EmitAndCountInstruction(              MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));        } else { -        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); +        emitX86Nops(*OutStreamer, 4, Subtarget);        }      } @@ -1603,12 +1633,12 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,      if (UsedMask[I])        EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));      else -      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); +      emitX86Nops(*OutStreamer, 1, Subtarget);    OutStreamer->AddComment("xray typed event end.");    // Record the sled version. -  recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0); +  recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);  }  void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, @@ -1623,7 +1653,7 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,              .getValueAsString()              .getAsInteger(10, Num))        return; -    EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo()); +    emitX86Nops(*OutStreamer, Num, Subtarget);      return;    }    // We want to emit the following pattern: @@ -1640,15 +1670,15 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,    //   call <relative offset, 32-bits>   // 5 bytes    //    auto CurSled = OutContext.createTempSymbol("xray_sled_", true); -  OutStreamer->EmitCodeAlignment(2); -  OutStreamer->EmitLabel(CurSled); +  OutStreamer->emitCodeAlignment(2); +  OutStreamer->emitLabel(CurSled);    // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as    // an operand (computed as an offset from the jmp instruction).    // FIXME: Find another less hacky way do force the relative jump. -  OutStreamer->EmitBytes("\xeb\x09"); -  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); -  recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); +  OutStreamer->emitBytes("\xeb\x09"); +  emitX86Nops(*OutStreamer, 9, Subtarget); +  recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);  }  void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, @@ -1670,17 +1700,17 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,    //    // This just makes sure that the alignment for the next instruction is 2.    auto CurSled = OutContext.createTempSymbol("xray_sled_", true); -  OutStreamer->EmitCodeAlignment(2); -  OutStreamer->EmitLabel(CurSled); +  OutStreamer->emitCodeAlignment(2); +  OutStreamer->emitLabel(CurSled);    unsigned OpCode = MI.getOperand(0).getImm();    MCInst Ret;    Ret.setOpcode(OpCode);    for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))      if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))        Ret.addOperand(MaybeOperand.getValue()); -  OutStreamer->EmitInstruction(Ret, getSubtargetInfo()); -  EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); -  recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); +  OutStreamer->emitInstruction(Ret, getSubtargetInfo()); +  emitX86Nops(*OutStreamer, 10, Subtarget); +  recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);  }  void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, @@ -1694,17 +1724,17 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,    // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual    // tail call much like how we have it in PATCHABLE_RET.    auto CurSled = OutContext.createTempSymbol("xray_sled_", true); -  OutStreamer->EmitCodeAlignment(2); -  OutStreamer->EmitLabel(CurSled); +  OutStreamer->emitCodeAlignment(2); +  OutStreamer->emitLabel(CurSled);    auto Target = OutContext.createTempSymbol();    // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as    // an operand (computed as an offset from the jmp instruction).    // FIXME: Find another less hacky way do force the relative jump. -  OutStreamer->EmitBytes("\xeb\x09"); -  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); -  OutStreamer->EmitLabel(Target); -  recordSled(CurSled, MI, SledKind::TAIL_CALL); +  OutStreamer->emitBytes("\xeb\x09"); +  emitX86Nops(*OutStreamer, 9, Subtarget); +  OutStreamer->emitLabel(Target); +  recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);    unsigned OpCode = MI.getOperand(0).getImm();    OpCode = convertTailJumpOpcode(OpCode); @@ -1717,7 +1747,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,    for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))      if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))        TC.addOperand(MaybeOperand.getValue()); -  OutStreamer->EmitInstruction(TC, getSubtargetInfo()); +  OutStreamer->emitInstruction(TC, getSubtargetInfo());  }  // Returns instruction preceding MBBI in MachineFunction. @@ -1961,300 +1991,9 @@ static unsigned getRegisterWidth(const MCOperandInfo &Info) {    llvm_unreachable("Unknown register class!");  } -void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { -  X86MCInstLower MCInstLowering(*MF, *this); -  const X86RegisterInfo *RI = -      MF->getSubtarget<X86Subtarget>().getRegisterInfo(); - -  // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that -  // are compressed from EVEX encoding to VEX encoding. -  if (TM.Options.MCOptions.ShowMCEncoding) { -    if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) -      OutStreamer->AddComment("EVEX TO VEX Compression ", false); -  } - +static void addConstantComments(const MachineInstr *MI, +                                MCStreamer &OutStreamer) {    switch (MI->getOpcode()) { -  case TargetOpcode::DBG_VALUE: -    llvm_unreachable("Should be handled target independently"); - -  // Emit nothing here but a comment if we can. -  case X86::Int_MemBarrier: -    OutStreamer->emitRawComment("MEMBARRIER"); -    return; - -  case X86::EH_RETURN: -  case X86::EH_RETURN64: { -    // Lower these as normal, but add some comments. -    Register Reg = MI->getOperand(0).getReg(); -    OutStreamer->AddComment(StringRef("eh_return, addr: %") + -                            X86ATTInstPrinter::getRegisterName(Reg)); -    break; -  } -  case X86::CLEANUPRET: { -    // Lower these as normal, but add some comments. -    OutStreamer->AddComment("CLEANUPRET"); -    break; -  } - -  case X86::CATCHRET: { -    // Lower these as normal, but add some comments. -    OutStreamer->AddComment("CATCHRET"); -    break; -  } - -  case X86::ENDBR32: -  case X86::ENDBR64: { -    // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for -    // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be -    // non-empty. If MI is the initial ENDBR, place the -    // __patchable_function_entries label after ENDBR. -    if (CurrentPatchableFunctionEntrySym && -        CurrentPatchableFunctionEntrySym == CurrentFnBegin && -        MI == &MF->front().front()) { -      MCInst Inst; -      MCInstLowering.Lower(MI, Inst); -      EmitAndCountInstruction(Inst); -      CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); -      OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym); -      return; -    } -    break; -  } - -  case X86::TAILJMPr: -  case X86::TAILJMPm: -  case X86::TAILJMPd: -  case X86::TAILJMPd_CC: -  case X86::TAILJMPr64: -  case X86::TAILJMPm64: -  case X86::TAILJMPd64: -  case X86::TAILJMPd64_CC: -  case X86::TAILJMPr64_REX: -  case X86::TAILJMPm64_REX: -    // Lower these as normal, but add some comments. -    OutStreamer->AddComment("TAILCALL"); -    break; - -  case X86::TLS_addr32: -  case X86::TLS_addr64: -  case X86::TLS_base_addr32: -  case X86::TLS_base_addr64: -    return LowerTlsAddr(MCInstLowering, *MI); - -  // Loading/storing mask pairs requires two kmov operations. The second one of these -  // needs a 2 byte displacement relative to the specified address (with 32 bit spill -  // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size, -  // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD. -  // -  // The displacement value might wrap around in theory, thus the asserts in both -  // cases. -  case X86::MASKPAIR16LOAD: { -    int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm(); -    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); -    Register Reg = MI->getOperand(0).getReg(); -    Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); -    Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); - -    // Load the first mask register -    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm); -    MIB.addReg(Reg0); -    for (int i = 0; i < X86::AddrNumOperands; ++i) { -      auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); -      MIB.addOperand(Op.getValue()); -    } -    EmitAndCountInstruction(MIB); - -    // Load the second mask register of the pair -    MIB = MCInstBuilder(X86::KMOVWkm); -    MIB.addReg(Reg1); -    for (int i = 0; i < X86::AddrNumOperands; ++i) { -      if (i == X86::AddrDisp) { -        MIB.addImm(Disp + 2); -      } else { -        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i)); -        MIB.addOperand(Op.getValue()); -      } -    } -    EmitAndCountInstruction(MIB); -    return; -  } - -  case X86::MASKPAIR16STORE: { -    int64_t Disp = MI->getOperand(X86::AddrDisp).getImm(); -    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); -    Register Reg = MI->getOperand(X86::AddrNumOperands).getReg(); -    Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); -    Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); - -    // Store the first mask register -    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk); -    for (int i = 0; i < X86::AddrNumOperands; ++i) -      MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue()); -    MIB.addReg(Reg0); -    EmitAndCountInstruction(MIB); - -    // Store the second mask register of the pair -    MIB = MCInstBuilder(X86::KMOVWmk); -    for (int i = 0; i < X86::AddrNumOperands; ++i) { -      if (i == X86::AddrDisp) { -        MIB.addImm(Disp + 2); -      } else { -        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i)); -        MIB.addOperand(Op.getValue()); -      } -    } -    MIB.addReg(Reg1); -    EmitAndCountInstruction(MIB); -    return; -  } - -  case X86::MOVPC32r: { -    // This is a pseudo op for a two instruction sequence with a label, which -    // looks like: -    //     call "L1$pb" -    // "L1$pb": -    //     popl %esi - -    // Emit the call. -    MCSymbol *PICBase = MF->getPICBaseSymbol(); -    // FIXME: We would like an efficient form for this, so we don't have to do a -    // lot of extra uniquing. -    EmitAndCountInstruction( -        MCInstBuilder(X86::CALLpcrel32) -            .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); - -    const X86FrameLowering *FrameLowering = -        MF->getSubtarget<X86Subtarget>().getFrameLowering(); -    bool hasFP = FrameLowering->hasFP(*MF); - -    // TODO: This is needed only if we require precise CFA. -    bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && -                               !OutStreamer->getDwarfFrameInfos().back().End; - -    int stackGrowth = -RI->getSlotSize(); - -    if (HasActiveDwarfFrame && !hasFP) { -      OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth); -    } - -    // Emit the label. -    OutStreamer->EmitLabel(PICBase); - -    // popl $reg -    EmitAndCountInstruction( -        MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); - -    if (HasActiveDwarfFrame && !hasFP) { -      OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth); -    } -    return; -  } - -  case X86::ADD32ri: { -    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. -    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) -      break; - -    // Okay, we have something like: -    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) - -    // For this, we want to print something like: -    //   MYGLOBAL + (. - PICBASE) -    // However, we can't generate a ".", so just emit a new label here and refer -    // to it. -    MCSymbol *DotSym = OutContext.createTempSymbol(); -    OutStreamer->EmitLabel(DotSym); - -    // Now that we have emitted the label, lower the complex operand expression. -    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); - -    const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); -    const MCExpr *PICBase = -        MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); -    DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); - -    DotExpr = MCBinaryExpr::createAdd( -        MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); - -    EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) -                                .addReg(MI->getOperand(0).getReg()) -                                .addReg(MI->getOperand(1).getReg()) -                                .addExpr(DotExpr)); -    return; -  } -  case TargetOpcode::STATEPOINT: -    return LowerSTATEPOINT(*MI, MCInstLowering); - -  case TargetOpcode::FAULTING_OP: -    return LowerFAULTING_OP(*MI, MCInstLowering); - -  case TargetOpcode::FENTRY_CALL: -    return LowerFENTRY_CALL(*MI, MCInstLowering); - -  case TargetOpcode::PATCHABLE_OP: -    return LowerPATCHABLE_OP(*MI, MCInstLowering); - -  case TargetOpcode::STACKMAP: -    return LowerSTACKMAP(*MI); - -  case TargetOpcode::PATCHPOINT: -    return LowerPATCHPOINT(*MI, MCInstLowering); - -  case TargetOpcode::PATCHABLE_FUNCTION_ENTER: -    return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); - -  case TargetOpcode::PATCHABLE_RET: -    return LowerPATCHABLE_RET(*MI, MCInstLowering); - -  case TargetOpcode::PATCHABLE_TAIL_CALL: -    return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); - -  case TargetOpcode::PATCHABLE_EVENT_CALL: -    return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); - -  case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: -    return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); - -  case X86::MORESTACK_RET: -    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); -    return; - -  case X86::MORESTACK_RET_RESTORE_R10: -    // Return, then restore R10. -    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); -    EmitAndCountInstruction( -        MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); -    return; - -  case X86::SEH_PushReg: -  case X86::SEH_SaveReg: -  case X86::SEH_SaveXMM: -  case X86::SEH_StackAlloc: -  case X86::SEH_StackAlign: -  case X86::SEH_SetFrame: -  case X86::SEH_PushFrame: -  case X86::SEH_EndPrologue: -    EmitSEHInstruction(MI); -    return; - -  case X86::SEH_Epilogue: { -    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); -    MachineBasicBlock::const_iterator MBBI(MI); -    // Check if preceded by a call and emit nop if so. -    for (MBBI = PrevCrossBBInst(MBBI); -         MBBI != MachineBasicBlock::const_iterator(); -         MBBI = PrevCrossBBInst(MBBI)) { -      // Conservatively assume that pseudo instructions don't emit code and keep -      // looking for a call. We may emit an unnecessary nop in some cases. -      if (!MBBI->isPseudo()) { -        if (MBBI->isCall()) -          EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); -        break; -      } -    } -    return; -  } -    // Lower PSHUFB and VPERMILP normally but add a comment if we can find    // a constant shuffle mask. We won't be able to do this at the MC layer    // because the mask isn't an immediate. @@ -2270,30 +2009,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {    case X86::VPSHUFBZrm:    case X86::VPSHUFBZrmk:    case X86::VPSHUFBZrmkz: { -    if (!OutStreamer->isVerboseAsm()) -      break; -    unsigned SrcIdx, MaskIdx; -    switch (MI->getOpcode()) { -    default: llvm_unreachable("Invalid opcode"); -    case X86::PSHUFBrm: -    case X86::VPSHUFBrm: -    case X86::VPSHUFBYrm: -    case X86::VPSHUFBZ128rm: -    case X86::VPSHUFBZ256rm: -    case X86::VPSHUFBZrm: -      SrcIdx = 1; MaskIdx = 5; break; -    case X86::VPSHUFBZ128rmkz: -    case X86::VPSHUFBZ256rmkz: -    case X86::VPSHUFBZrmkz: -      SrcIdx = 2; MaskIdx = 6; break; -    case X86::VPSHUFBZ128rmk: -    case X86::VPSHUFBZ256rmk: -    case X86::VPSHUFBZrmk: -      SrcIdx = 3; MaskIdx = 7; break; +    unsigned SrcIdx = 1; +    if (X86II::isKMasked(MI->getDesc().TSFlags)) { +      // Skip mask operand. +      ++SrcIdx; +      if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { +        // Skip passthru operand. +        ++SrcIdx; +      }      } +    unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp; -    assert(MI->getNumOperands() >= 6 && -           "We should always have at least 6 operands!"); +    assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) && +           "Unexpected number of operands!");      const MachineOperand &MaskOp = MI->getOperand(MaskIdx);      if (auto *C = getConstantFromPool(*MI, MaskOp)) { @@ -2301,7 +2029,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {        SmallVector<int, 64> Mask;        DecodePSHUFBMask(C, Width, Mask);        if (!Mask.empty()) -        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); +        OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));      }      break;    } @@ -2328,9 +2056,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {    case X86::VPERMILPDZrm:    case X86::VPERMILPDZrmk:    case X86::VPERMILPDZrmkz: { -    if (!OutStreamer->isVerboseAsm()) -      break; -    unsigned SrcIdx, MaskIdx;      unsigned ElSize;      switch (MI->getOpcode()) {      default: llvm_unreachable("Invalid opcode"); @@ -2339,33 +2064,42 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {      case X86::VPERMILPSZ128rm:      case X86::VPERMILPSZ256rm:      case X86::VPERMILPSZrm: -      SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;      case X86::VPERMILPSZ128rmkz:      case X86::VPERMILPSZ256rmkz:      case X86::VPERMILPSZrmkz: -      SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;      case X86::VPERMILPSZ128rmk:      case X86::VPERMILPSZ256rmk:      case X86::VPERMILPSZrmk: -      SrcIdx = 3; MaskIdx = 7; ElSize = 32; break; +      ElSize = 32; +      break;      case X86::VPERMILPDrm:      case X86::VPERMILPDYrm:      case X86::VPERMILPDZ128rm:      case X86::VPERMILPDZ256rm:      case X86::VPERMILPDZrm: -      SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;      case X86::VPERMILPDZ128rmkz:      case X86::VPERMILPDZ256rmkz:      case X86::VPERMILPDZrmkz: -      SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;      case X86::VPERMILPDZ128rmk:      case X86::VPERMILPDZ256rmk:      case X86::VPERMILPDZrmk: -      SrcIdx = 3; MaskIdx = 7; ElSize = 64; break; +      ElSize = 64; +      break;      } -    assert(MI->getNumOperands() >= 6 && -           "We should always have at least 6 operands!"); +    unsigned SrcIdx = 1; +    if (X86II::isKMasked(MI->getDesc().TSFlags)) { +      // Skip mask operand. +      ++SrcIdx; +      if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { +        // Skip passthru operand. +        ++SrcIdx; +      } +    } +    unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp; + +    assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) && +           "Unexpected number of operands!");      const MachineOperand &MaskOp = MI->getOperand(MaskIdx);      if (auto *C = getConstantFromPool(*MI, MaskOp)) { @@ -2373,7 +2107,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {        SmallVector<int, 16> Mask;        DecodeVPERMILPMask(C, ElSize, Width, Mask);        if (!Mask.empty()) -        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); +        OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));      }      break;    } @@ -2382,10 +2116,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {    case X86::VPERMIL2PSrm:    case X86::VPERMIL2PDYrm:    case X86::VPERMIL2PSYrm: { -    if (!OutStreamer->isVerboseAsm()) -      break; -    assert(MI->getNumOperands() >= 8 && -           "We should always have at least 8 operands!"); +    assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && +           "Unexpected number of operands!");      const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);      if (!CtrlOp.isImm()) @@ -2398,47 +2130,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {      case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;      } -    const MachineOperand &MaskOp = MI->getOperand(6); +    const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);      if (auto *C = getConstantFromPool(*MI, MaskOp)) {        unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);        SmallVector<int, 16> Mask;        DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);        if (!Mask.empty()) -        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); +        OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));      }      break;    }    case X86::VPPERMrrm: { -    if (!OutStreamer->isVerboseAsm()) -      break; -    assert(MI->getNumOperands() >= 7 && -           "We should always have at least 7 operands!"); +    assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) && +           "Unexpected number of operands!"); -    const MachineOperand &MaskOp = MI->getOperand(6); +    const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);      if (auto *C = getConstantFromPool(*MI, MaskOp)) {        unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);        SmallVector<int, 16> Mask;        DecodeVPPERMMask(C, Width, Mask);        if (!Mask.empty()) -        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); +        OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));      }      break;    }    case X86::MMX_MOVQ64rm: { -    if (!OutStreamer->isVerboseAsm()) -      break; -    if (MI->getNumOperands() <= 4) -      break; -    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { +    assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) && +           "Unexpected number of operands!"); +    if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {        std::string Comment;        raw_string_ostream CS(Comment);        const MachineOperand &DstOp = MI->getOperand(0);        CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";        if (auto *CF = dyn_cast<ConstantFP>(C)) {          CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false); -        OutStreamer->AddComment(CS.str()); +        OutStreamer.AddComment(CS.str());        }      }      break; @@ -2489,11 +2217,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {    case X86::VBROADCASTI64X2Z128rm:    case X86::VBROADCASTI64X2rm:    case X86::VBROADCASTI64X4rm: -    if (!OutStreamer->isVerboseAsm()) -      break; -    if (MI->getNumOperands() <= 4) -      break; -    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { +    assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) && +           "Unexpected number of operands!"); +    if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {        int NumLanes = 1;        // Override NumLanes for the broadcast instructions.        switch (MI->getOpcode()) { @@ -2535,7 +2261,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {            }          }          CS << "]"; -        OutStreamer->AddComment(CS.str()); +        OutStreamer.AddComment(CS.str());        } else if (auto *CV = dyn_cast<ConstantVector>(C)) {          CS << "<";          for (int l = 0; l != NumLanes; ++l) { @@ -2547,80 +2273,79 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {            }          }          CS << ">"; -        OutStreamer->AddComment(CS.str()); +        OutStreamer.AddComment(CS.str());        }      }      break; +    case X86::MOVDDUPrm:    case X86::VMOVDDUPrm:    case X86::VMOVDDUPZ128rm:    case X86::VBROADCASTSSrm:    case X86::VBROADCASTSSYrm: -  case X86::VBROADCASTSSZ128m: -  case X86::VBROADCASTSSZ256m: -  case X86::VBROADCASTSSZm: +  case X86::VBROADCASTSSZ128rm: +  case X86::VBROADCASTSSZ256rm: +  case X86::VBROADCASTSSZrm:    case X86::VBROADCASTSDYrm: -  case X86::VBROADCASTSDZ256m: -  case X86::VBROADCASTSDZm: +  case X86::VBROADCASTSDZ256rm: +  case X86::VBROADCASTSDZrm:    case X86::VPBROADCASTBrm:    case X86::VPBROADCASTBYrm: -  case X86::VPBROADCASTBZ128m: -  case X86::VPBROADCASTBZ256m: -  case X86::VPBROADCASTBZm: +  case X86::VPBROADCASTBZ128rm: +  case X86::VPBROADCASTBZ256rm: +  case X86::VPBROADCASTBZrm:    case X86::VPBROADCASTDrm:    case X86::VPBROADCASTDYrm: -  case X86::VPBROADCASTDZ128m: -  case X86::VPBROADCASTDZ256m: -  case X86::VPBROADCASTDZm: +  case X86::VPBROADCASTDZ128rm: +  case X86::VPBROADCASTDZ256rm: +  case X86::VPBROADCASTDZrm:    case X86::VPBROADCASTQrm:    case X86::VPBROADCASTQYrm: -  case X86::VPBROADCASTQZ128m: -  case X86::VPBROADCASTQZ256m: -  case X86::VPBROADCASTQZm: +  case X86::VPBROADCASTQZ128rm: +  case X86::VPBROADCASTQZ256rm: +  case X86::VPBROADCASTQZrm:    case X86::VPBROADCASTWrm:    case X86::VPBROADCASTWYrm: -  case X86::VPBROADCASTWZ128m: -  case X86::VPBROADCASTWZ256m: -  case X86::VPBROADCASTWZm: -    if (!OutStreamer->isVerboseAsm()) -      break; -    if (MI->getNumOperands() <= 4) -      break; -    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { +  case X86::VPBROADCASTWZ128rm: +  case X86::VPBROADCASTWZ256rm: +  case X86::VPBROADCASTWZrm: +    assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) && +           "Unexpected number of operands!"); +    if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {        int NumElts;        switch (MI->getOpcode()) {        default: llvm_unreachable("Invalid opcode"); -      case X86::MOVDDUPrm:         NumElts = 2;  break; -      case X86::VMOVDDUPrm:        NumElts = 2;  break; -      case X86::VMOVDDUPZ128rm:    NumElts = 2;  break; -      case X86::VBROADCASTSSrm:    NumElts = 4;  break; -      case X86::VBROADCASTSSYrm:   NumElts = 8;  break; -      case X86::VBROADCASTSSZ128m: NumElts = 4;  break; -      case X86::VBROADCASTSSZ256m: NumElts = 8;  break; -      case X86::VBROADCASTSSZm:    NumElts = 16; break; -      case X86::VBROADCASTSDYrm:   NumElts = 4;  break; -      case X86::VBROADCASTSDZ256m: NumElts = 4;  break; -      case X86::VBROADCASTSDZm:    NumElts = 8;  break; -      case X86::VPBROADCASTBrm:    NumElts = 16; break; -      case X86::VPBROADCASTBYrm:   NumElts = 32; break; -      case X86::VPBROADCASTBZ128m: NumElts = 16; break; -      case X86::VPBROADCASTBZ256m: NumElts = 32; break; -      case X86::VPBROADCASTBZm:    NumElts = 64; break; -      case X86::VPBROADCASTDrm:    NumElts = 4;  break; -      case X86::VPBROADCASTDYrm:   NumElts = 8;  break; -      case X86::VPBROADCASTDZ128m: NumElts = 4;  break; -      case X86::VPBROADCASTDZ256m: NumElts = 8;  break; -      case X86::VPBROADCASTDZm:    NumElts = 16; break; -      case X86::VPBROADCASTQrm:    NumElts = 2;  break; -      case X86::VPBROADCASTQYrm:   NumElts = 4;  break; -      case X86::VPBROADCASTQZ128m: NumElts = 2;  break; -      case X86::VPBROADCASTQZ256m: NumElts = 4;  break; -      case X86::VPBROADCASTQZm:    NumElts = 8;  break; -      case X86::VPBROADCASTWrm:    NumElts = 8;  break; -      case X86::VPBROADCASTWYrm:   NumElts = 16; break; -      case X86::VPBROADCASTWZ128m: NumElts = 8;  break; -      case X86::VPBROADCASTWZ256m: NumElts = 16; break; -      case X86::VPBROADCASTWZm:    NumElts = 32; break; +      case X86::MOVDDUPrm:          NumElts = 2;  break; +      case X86::VMOVDDUPrm:         NumElts = 2;  break; +      case X86::VMOVDDUPZ128rm:     NumElts = 2;  break; +      case X86::VBROADCASTSSrm:     NumElts = 4;  break; +      case X86::VBROADCASTSSYrm:    NumElts = 8;  break; +      case X86::VBROADCASTSSZ128rm: NumElts = 4;  break; +      case X86::VBROADCASTSSZ256rm: NumElts = 8;  break; +      case X86::VBROADCASTSSZrm:    NumElts = 16; break; +      case X86::VBROADCASTSDYrm:    NumElts = 4;  break; +      case X86::VBROADCASTSDZ256rm: NumElts = 4;  break; +      case X86::VBROADCASTSDZrm:    NumElts = 8;  break; +      case X86::VPBROADCASTBrm:     NumElts = 16; break; +      case X86::VPBROADCASTBYrm:    NumElts = 32; break; +      case X86::VPBROADCASTBZ128rm: NumElts = 16; break; +      case X86::VPBROADCASTBZ256rm: NumElts = 32; break; +      case X86::VPBROADCASTBZrm:    NumElts = 64; break; +      case X86::VPBROADCASTDrm:     NumElts = 4;  break; +      case X86::VPBROADCASTDYrm:    NumElts = 8;  break; +      case X86::VPBROADCASTDZ128rm: NumElts = 4;  break; +      case X86::VPBROADCASTDZ256rm: NumElts = 8;  break; +      case X86::VPBROADCASTDZrm:    NumElts = 16; break; +      case X86::VPBROADCASTQrm:     NumElts = 2;  break; +      case X86::VPBROADCASTQYrm:    NumElts = 4;  break; +      case X86::VPBROADCASTQZ128rm: NumElts = 2;  break; +      case X86::VPBROADCASTQZ256rm: NumElts = 4;  break; +      case X86::VPBROADCASTQZrm:    NumElts = 8;  break; +      case X86::VPBROADCASTWrm:     NumElts = 8;  break; +      case X86::VPBROADCASTWYrm:    NumElts = 16; break; +      case X86::VPBROADCASTWZ128rm: NumElts = 8;  break; +      case X86::VPBROADCASTWZ256rm: NumElts = 16; break; +      case X86::VPBROADCASTWZrm:    NumElts = 32; break;        }        std::string Comment; @@ -2634,8 +2359,241 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {          printConstant(C, CS);        }        CS << "]"; -      OutStreamer->AddComment(CS.str()); +      OutStreamer.AddComment(CS.str()); +    } +  } +} + +void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { +  X86MCInstLower MCInstLowering(*MF, *this); +  const X86RegisterInfo *RI = +      MF->getSubtarget<X86Subtarget>().getRegisterInfo(); + +  // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that +  // are compressed from EVEX encoding to VEX encoding. +  if (TM.Options.MCOptions.ShowMCEncoding) { +    if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) +      OutStreamer->AddComment("EVEX TO VEX Compression ", false); +  } + +  // Add comments for values loaded from constant pool. +  if (OutStreamer->isVerboseAsm()) +    addConstantComments(MI, *OutStreamer); + +  switch (MI->getOpcode()) { +  case TargetOpcode::DBG_VALUE: +    llvm_unreachable("Should be handled target independently"); + +  // Emit nothing here but a comment if we can. +  case X86::Int_MemBarrier: +    OutStreamer->emitRawComment("MEMBARRIER"); +    return; + +  case X86::EH_RETURN: +  case X86::EH_RETURN64: { +    // Lower these as normal, but add some comments. +    Register Reg = MI->getOperand(0).getReg(); +    OutStreamer->AddComment(StringRef("eh_return, addr: %") + +                            X86ATTInstPrinter::getRegisterName(Reg)); +    break; +  } +  case X86::CLEANUPRET: { +    // Lower these as normal, but add some comments. +    OutStreamer->AddComment("CLEANUPRET"); +    break; +  } + +  case X86::CATCHRET: { +    // Lower these as normal, but add some comments. +    OutStreamer->AddComment("CATCHRET"); +    break; +  } + +  case X86::ENDBR32: +  case X86::ENDBR64: { +    // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for +    // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be +    // non-empty. If MI is the initial ENDBR, place the +    // __patchable_function_entries label after ENDBR. +    if (CurrentPatchableFunctionEntrySym && +        CurrentPatchableFunctionEntrySym == CurrentFnBegin && +        MI == &MF->front().front()) { +      MCInst Inst; +      MCInstLowering.Lower(MI, Inst); +      EmitAndCountInstruction(Inst); +      CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); +      OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); +      return;      } +    break; +  } + +  case X86::TAILJMPr: +  case X86::TAILJMPm: +  case X86::TAILJMPd: +  case X86::TAILJMPd_CC: +  case X86::TAILJMPr64: +  case X86::TAILJMPm64: +  case X86::TAILJMPd64: +  case X86::TAILJMPd64_CC: +  case X86::TAILJMPr64_REX: +  case X86::TAILJMPm64_REX: +    // Lower these as normal, but add some comments. +    OutStreamer->AddComment("TAILCALL"); +    break; + +  case X86::TLS_addr32: +  case X86::TLS_addr64: +  case X86::TLS_base_addr32: +  case X86::TLS_base_addr64: +    return LowerTlsAddr(MCInstLowering, *MI); + +  case X86::MOVPC32r: { +    // This is a pseudo op for a two instruction sequence with a label, which +    // looks like: +    //     call "L1$pb" +    // "L1$pb": +    //     popl %esi + +    // Emit the call. +    MCSymbol *PICBase = MF->getPICBaseSymbol(); +    // FIXME: We would like an efficient form for this, so we don't have to do a +    // lot of extra uniquing. +    EmitAndCountInstruction( +        MCInstBuilder(X86::CALLpcrel32) +            .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); + +    const X86FrameLowering *FrameLowering = +        MF->getSubtarget<X86Subtarget>().getFrameLowering(); +    bool hasFP = FrameLowering->hasFP(*MF); + +    // TODO: This is needed only if we require precise CFA. +    bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && +                               !OutStreamer->getDwarfFrameInfos().back().End; + +    int stackGrowth = -RI->getSlotSize(); + +    if (HasActiveDwarfFrame && !hasFP) { +      OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth); +    } + +    // Emit the label. +    OutStreamer->emitLabel(PICBase); + +    // popl $reg +    EmitAndCountInstruction( +        MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); + +    if (HasActiveDwarfFrame && !hasFP) { +      OutStreamer->emitCFIAdjustCfaOffset(stackGrowth); +    } +    return; +  } + +  case X86::ADD32ri: { +    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. +    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) +      break; + +    // Okay, we have something like: +    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) + +    // For this, we want to print something like: +    //   MYGLOBAL + (. - PICBASE) +    // However, we can't generate a ".", so just emit a new label here and refer +    // to it. +    MCSymbol *DotSym = OutContext.createTempSymbol(); +    OutStreamer->emitLabel(DotSym); + +    // Now that we have emitted the label, lower the complex operand expression. +    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); + +    const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); +    const MCExpr *PICBase = +        MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); +    DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); + +    DotExpr = MCBinaryExpr::createAdd( +        MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); + +    EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) +                                .addReg(MI->getOperand(0).getReg()) +                                .addReg(MI->getOperand(1).getReg()) +                                .addExpr(DotExpr)); +    return; +  } +  case TargetOpcode::STATEPOINT: +    return LowerSTATEPOINT(*MI, MCInstLowering); + +  case TargetOpcode::FAULTING_OP: +    return LowerFAULTING_OP(*MI, MCInstLowering); + +  case TargetOpcode::FENTRY_CALL: +    return LowerFENTRY_CALL(*MI, MCInstLowering); + +  case TargetOpcode::PATCHABLE_OP: +    return LowerPATCHABLE_OP(*MI, MCInstLowering); + +  case TargetOpcode::STACKMAP: +    return LowerSTACKMAP(*MI); + +  case TargetOpcode::PATCHPOINT: +    return LowerPATCHPOINT(*MI, MCInstLowering); + +  case TargetOpcode::PATCHABLE_FUNCTION_ENTER: +    return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); + +  case TargetOpcode::PATCHABLE_RET: +    return LowerPATCHABLE_RET(*MI, MCInstLowering); + +  case TargetOpcode::PATCHABLE_TAIL_CALL: +    return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + +  case TargetOpcode::PATCHABLE_EVENT_CALL: +    return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); + +  case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: +    return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); + +  case X86::MORESTACK_RET: +    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); +    return; + +  case X86::MORESTACK_RET_RESTORE_R10: +    // Return, then restore R10. +    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); +    EmitAndCountInstruction( +        MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); +    return; + +  case X86::SEH_PushReg: +  case X86::SEH_SaveReg: +  case X86::SEH_SaveXMM: +  case X86::SEH_StackAlloc: +  case X86::SEH_StackAlign: +  case X86::SEH_SetFrame: +  case X86::SEH_PushFrame: +  case X86::SEH_EndPrologue: +    EmitSEHInstruction(MI); +    return; + +  case X86::SEH_Epilogue: { +    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); +    MachineBasicBlock::const_iterator MBBI(MI); +    // Check if preceded by a call and emit nop if so. +    for (MBBI = PrevCrossBBInst(MBBI); +         MBBI != MachineBasicBlock::const_iterator(); +         MBBI = PrevCrossBBInst(MBBI)) { +      // Conservatively assume that pseudo instructions don't emit code and keep +      // looking for a call. We may emit an unnecessary nop in some cases. +      if (!MBBI->isPseudo()) { +        if (MBBI->isCall()) +          EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); +        break; +      } +    } +    return; +  }    }    MCInst TmpInst; @@ -2652,7 +2610,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {      // after it.      SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());      // Then emit the call -    OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo()); +    OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());      return;    }  | 
