diff options
Diffstat (limited to 'lib/Target/X86/X86MCInstLower.cpp')
-rw-r--r-- | lib/Target/X86/X86MCInstLower.cpp | 635 |
1 files changed, 431 insertions, 204 deletions
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index e1ca558f0f2c9..906e3427b2ff8 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -20,6 +20,7 @@ #include "Utils/X86ShuffleDecode.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineOperand.h" @@ -35,9 +36,15 @@ #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + using namespace llvm; namespace { @@ -72,47 +79,33 @@ private: static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSubtargetInfo &STI); -namespace llvm { - X86AsmPrinter::StackMapShadowTracker::StackMapShadowTracker(TargetMachine &TM) - : TM(TM), InShadow(false), RequiredShadowSize(0), CurrentShadowSize(0) {} - - X86AsmPrinter::StackMapShadowTracker::~StackMapShadowTracker() {} - - void - X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) { - MF = &F; - CodeEmitter.reset(TM.getTarget().createMCCodeEmitter( - *MF->getSubtarget().getInstrInfo(), - *MF->getSubtarget().getRegisterInfo(), MF->getContext())); - } - - void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, - const MCSubtargetInfo &STI) { - if (InShadow) { - SmallString<256> Code; - SmallVector<MCFixup, 4> Fixups; - raw_svector_ostream VecOS(Code); - CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI); - CurrentShadowSize += Code.size(); - if (CurrentShadowSize >= RequiredShadowSize) - InShadow = false; // The shadow is big enough. Stop counting. - } +void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, + const MCSubtargetInfo &STI, + MCCodeEmitter *CodeEmitter) { + if (InShadow) { + SmallString<256> Code; + SmallVector<MCFixup, 4> Fixups; + raw_svector_ostream VecOS(Code); + CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI); + CurrentShadowSize += Code.size(); + if (CurrentShadowSize >= RequiredShadowSize) + InShadow = false; // The shadow is big enough. Stop counting. } +} - void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( +void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { - if (InShadow && CurrentShadowSize < RequiredShadowSize) { - InShadow = false; - EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, - MF->getSubtarget<X86Subtarget>().is64Bit(), STI); - } + if (InShadow && CurrentShadowSize < RequiredShadowSize) { + InShadow = false; + EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, + MF->getSubtarget<X86Subtarget>().is64Bit(), STI); } +} - void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { - OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); - SMShadowTracker.count(Inst, getSubtargetInfo()); - } -} // end llvm namespace +void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { + OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); + SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); +} X86MCInstLower::X86MCInstLower(const MachineFunction &mf, X86AsmPrinter &asmprinter) @@ -140,12 +133,8 @@ GetSymbolFromOperand(const MachineOperand &MO) const { // Handle dllimport linkage. Name += "__imp_"; break; - case X86II::MO_DARWIN_STUB: - Suffix = "$stub"; - break; case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: - case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: Suffix = "$non_lazy_ptr"; break; } @@ -153,8 +142,6 @@ GetSymbolFromOperand(const MachineOperand &MO) const { if (!Suffix.empty()) Name += DL.getPrivateGlobalPrefix(); - unsigned PrefixLen = Name.size(); - if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); AsmPrinter.getNameWithPrefix(Name, GV); @@ -164,14 +151,11 @@ GetSymbolFromOperand(const MachineOperand &MO) const { assert(Suffix.empty()); Sym = MO.getMBB()->getSymbol(); } - unsigned OrigLen = Name.size() - PrefixLen; Name += Suffix; if (!Sym) Sym = Ctx.getOrCreateSymbol(Name); - StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); - // If the target flags on the operand changes the name of the symbol, do that // before we return the symbol. switch (MO.getTargetFlags()) { @@ -189,36 +173,6 @@ GetSymbolFromOperand(const MachineOperand &MO) const { } break; } - case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { - MachineModuleInfoImpl::StubValueTy &StubSym = - getMachOMMI().getHiddenGVStubEntry(Sym); - if (!StubSym.getPointer()) { - assert(MO.isGlobal() && "Extern symbol not handled yet"); - StubSym = - MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), - !MO.getGlobal()->hasInternalLinkage()); - } - break; - } - case X86II::MO_DARWIN_STUB: { - MachineModuleInfoImpl::StubValueTy &StubSym = - getMachOMMI().getFnStubEntry(Sym); - if (StubSym.getPointer()) - return Sym; - - if (MO.isGlobal()) { - StubSym = - MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), - !MO.getGlobal()->hasInternalLinkage()); - } else { - StubSym = - MachineModuleInfoImpl:: - StubValueTy(Ctx.getOrCreateSymbol(OrigName), false); - } - break; - } } return Sym; @@ -237,7 +191,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, // These affect the name of the symbol, not any suffix. case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DLLIMPORT: - case X86II::MO_DARWIN_STUB: break; case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; @@ -265,14 +218,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: - case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: Expr = MCSymbolRefExpr::create(Sym, Ctx); // Subtract the pic base. Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI()) { - assert(MAI.doesSetDirectiveSuppressesReloc()); + assert(MAI.doesSetDirectiveSuppressReloc()); // If .set directive is supported, use it to reduce the number of // relocations the assembler will generate for differences between // local labels. This is only safe when the symbols are in the same @@ -653,50 +605,81 @@ ReSimplify: // MOV64ao8, MOV64o8a // XCHG16ar, XCHG32ar, XCHG64ar case X86::MOV8mr_NOREX: - case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o32a); break; + case X86::MOV8mr: case X86::MOV8rm_NOREX: - case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao32); break; - case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o32a); break; - case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao32); break; - case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break; - case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break; - - case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break; - case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break; - case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break; - case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break; - case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break; - case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break; - case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break; - case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break; - case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break; - case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break; - case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break; - case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break; - case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break; - case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break; - case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break; - case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break; - case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break; - case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break; - case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break; - case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break; - case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break; - case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break; - case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break; - case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break; - case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break; - case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break; - case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break; - case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break; - case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break; - case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break; - case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break; - case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break; - case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break; - case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break; - case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break; - case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break; + case X86::MOV8rm: + case X86::MOV16mr: + case X86::MOV16rm: + case X86::MOV32mr: + case X86::MOV32rm: { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::MOV8mr_NOREX: + case X86::MOV8mr: NewOpc = X86::MOV8o32a; break; + case X86::MOV8rm_NOREX: + case X86::MOV8rm: NewOpc = X86::MOV8ao32; break; + case X86::MOV16mr: NewOpc = X86::MOV16o32a; break; + case X86::MOV16rm: NewOpc = X86::MOV16ao32; break; + case X86::MOV32mr: NewOpc = X86::MOV32o32a; break; + case X86::MOV32rm: NewOpc = X86::MOV32ao32; break; + } + SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc); + break; + } + + case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32: + case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32: + case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32: + case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32: + case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32: + case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32: + case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32: + case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32: + case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::ADC8ri: NewOpc = X86::ADC8i8; break; + case X86::ADC16ri: NewOpc = X86::ADC16i16; break; + case X86::ADC32ri: NewOpc = X86::ADC32i32; break; + case X86::ADC64ri32: NewOpc = X86::ADC64i32; break; + case X86::ADD8ri: NewOpc = X86::ADD8i8; break; + case X86::ADD16ri: NewOpc = X86::ADD16i16; break; + case X86::ADD32ri: NewOpc = X86::ADD32i32; break; + case X86::ADD64ri32: NewOpc = X86::ADD64i32; break; + case X86::AND8ri: NewOpc = X86::AND8i8; break; + case X86::AND16ri: NewOpc = X86::AND16i16; break; + case X86::AND32ri: NewOpc = X86::AND32i32; break; + case X86::AND64ri32: NewOpc = X86::AND64i32; break; + case X86::CMP8ri: NewOpc = X86::CMP8i8; break; + case X86::CMP16ri: NewOpc = X86::CMP16i16; break; + case X86::CMP32ri: NewOpc = X86::CMP32i32; break; + case X86::CMP64ri32: NewOpc = X86::CMP64i32; break; + case X86::OR8ri: NewOpc = X86::OR8i8; break; + case X86::OR16ri: NewOpc = X86::OR16i16; break; + case X86::OR32ri: NewOpc = X86::OR32i32; break; + case X86::OR64ri32: NewOpc = X86::OR64i32; break; + case X86::SBB8ri: NewOpc = X86::SBB8i8; break; + case X86::SBB16ri: NewOpc = X86::SBB16i16; break; + case X86::SBB32ri: NewOpc = X86::SBB32i32; break; + case X86::SBB64ri32: NewOpc = X86::SBB64i32; break; + case X86::SUB8ri: NewOpc = X86::SUB8i8; break; + case X86::SUB16ri: NewOpc = X86::SUB16i16; break; + case X86::SUB32ri: NewOpc = X86::SUB32i32; break; + case X86::SUB64ri32: NewOpc = X86::SUB64i32; break; + case X86::TEST8ri: NewOpc = X86::TEST8i8; break; + case X86::TEST16ri: NewOpc = X86::TEST16i16; break; + case X86::TEST32ri: NewOpc = X86::TEST32i32; break; + case X86::TEST64ri32: NewOpc = X86::TEST64i32; break; + case X86::XOR8ri: NewOpc = X86::XOR8i8; break; + case X86::XOR16ri: NewOpc = X86::XOR16i16; break; + case X86::XOR32ri: NewOpc = X86::XOR32i32; break; + case X86::XOR64ri32: NewOpc = X86::XOR64i32; break; + } + SimplifyShortImmForm(OutMI, NewOpc); + break; + } // Try to shrink some forms of movsx. case X86::MOVSX16rr8: @@ -785,55 +768,77 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, .addExpr(tlsRef)); } -/// \brief Emit the optimal amount of multi-byte nops on X86. -static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSubtargetInfo &STI) { +/// \brief Emit the largest nop instruction smaller than or equal to \p NumBytes +/// bytes. Return the size of nop emitted. +static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, + const MCSubtargetInfo &STI) { // This works only for 64bit. For 32bit we have to do additional checking if // the CPU supports multi-byte nops. assert(Is64Bit && "EmitNops only supports X86-64"); - while (NumBytes) { - unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; - Opc = IndexReg = Displacement = SegmentReg = 0; - BaseReg = X86::RAX; ScaleVal = 1; - switch (NumBytes) { - case 0: llvm_unreachable("Zero nops?"); break; - case 1: NumBytes -= 1; Opc = X86::NOOP; break; - case 2: NumBytes -= 2; Opc = X86::XCHG16ar; break; - case 3: NumBytes -= 3; Opc = X86::NOOPL; break; - case 4: NumBytes -= 4; Opc = X86::NOOPL; Displacement = 8; break; - case 5: NumBytes -= 5; Opc = X86::NOOPL; Displacement = 8; - IndexReg = X86::RAX; break; - case 6: NumBytes -= 6; Opc = X86::NOOPW; Displacement = 8; - IndexReg = X86::RAX; break; - case 7: NumBytes -= 7; Opc = X86::NOOPL; Displacement = 512; break; - case 8: NumBytes -= 8; Opc = X86::NOOPL; Displacement = 512; - IndexReg = X86::RAX; break; - case 9: NumBytes -= 9; Opc = X86::NOOPW; Displacement = 512; - IndexReg = X86::RAX; break; - default: NumBytes -= 10; Opc = X86::NOOPW; Displacement = 512; - IndexReg = X86::RAX; SegmentReg = X86::CS; break; - } - unsigned NumPrefixes = std::min(NumBytes, 5U); - NumBytes -= NumPrefixes; - for (unsigned i = 0; i != NumPrefixes; ++i) - OS.EmitBytes("\x66"); + unsigned NopSize; + unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; + Opc = IndexReg = Displacement = SegmentReg = 0; + BaseReg = X86::RAX; + ScaleVal = 1; + switch (NumBytes) { + case 0: llvm_unreachable("Zero nops?"); break; + case 1: NopSize = 1; Opc = X86::NOOP; break; + case 2: NopSize = 2; Opc = X86::XCHG16ar; break; + case 3: NopSize = 3; Opc = X86::NOOPL; break; + case 4: NopSize = 4; Opc = X86::NOOPL; Displacement = 8; break; + case 5: NopSize = 5; Opc = X86::NOOPL; Displacement = 8; + IndexReg = X86::RAX; break; + case 6: NopSize = 6; Opc = X86::NOOPW; Displacement = 8; + IndexReg = X86::RAX; break; + case 7: NopSize = 7; Opc = X86::NOOPL; Displacement = 512; break; + case 8: NopSize = 8; Opc = X86::NOOPL; Displacement = 512; + IndexReg = X86::RAX; break; + case 9: NopSize = 9; Opc = X86::NOOPW; Displacement = 512; + IndexReg = X86::RAX; break; + default: NopSize = 10; Opc = X86::NOOPW; Displacement = 512; + IndexReg = X86::RAX; SegmentReg = X86::CS; break; + } - switch (Opc) { - default: llvm_unreachable("Unexpected opcode"); break; - case X86::NOOP: - OS.EmitInstruction(MCInstBuilder(Opc), STI); - break; - case X86::XCHG16ar: - OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI); - break; - case X86::NOOPL: - case X86::NOOPW: - OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg) - .addImm(ScaleVal).addReg(IndexReg) - .addImm(Displacement).addReg(SegmentReg), STI); - break; - } - } // while (NumBytes) + unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); + NopSize += NumPrefixes; + for (unsigned i = 0; i != NumPrefixes; ++i) + OS.EmitBytes("\x66"); + + switch (Opc) { + default: + llvm_unreachable("Unexpected opcode"); + break; + case X86::NOOP: + OS.EmitInstruction(MCInstBuilder(Opc), STI); + break; + case X86::XCHG16ar: + OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI); + break; + case X86::NOOPL: + case X86::NOOPW: + OS.EmitInstruction(MCInstBuilder(Opc) + .addReg(BaseReg) + .addImm(ScaleVal) + .addReg(IndexReg) + .addImm(Displacement) + .addReg(SegmentReg), + STI); + break; + } + assert(NopSize <= NumBytes && "We overemitted?"); + return NopSize; +} + +/// \brief Emit the optimal amount of multi-byte nops on X86. +static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, + const MCSubtargetInfo &STI) { + unsigned NopsToEmit = NumBytes; + (void)NopsToEmit; + while (NumBytes) { + NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI); + assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); + } } void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, @@ -891,10 +896,10 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI, X86MCInstLower &MCIL) { - // FAULTING_LOAD_OP <def>, <handler label>, <load opcode>, <load operands> + // FAULTING_LOAD_OP <def>, <MBB handler>, <load opcode>, <load operands> unsigned LoadDefRegister = MI.getOperand(0).getReg(); - MCSymbol *HandlerLabel = MI.getOperand(1).getMCSymbol(); + MCSymbol *HandlerLabel = MI.getOperand(1).getMBB()->getSymbol(); unsigned LoadOpcode = MI.getOperand(2).getImm(); unsigned LoadOperandsBeginIdx = 3; @@ -915,6 +920,43 @@ void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI, OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo()); } +void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // PATCHABLE_OP minsize, opcode, operands + + unsigned MinSize = MI.getOperand(0).getImm(); + unsigned Opcode = MI.getOperand(1).getImm(); + + MCInst MCI; + MCI.setOpcode(Opcode); + for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end())) + if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) + MCI.addOperand(MaybeOperand.getValue()); + + SmallString<256> Code; + SmallVector<MCFixup, 4> Fixups; + raw_svector_ostream VecOS(Code); + CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo()); + + if (Code.size() < MinSize) { + if (MinSize == 2 && Opcode == X86::PUSH64r) { + // This is an optimization that lets us get away without emitting a nop in + // many cases. + // + // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two + // bytes too, so the check on MinSize is important. + MCI.setOpcode(X86::PUSH64rmr); + } else { + unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(), + getSubtargetInfo()); + assert(NopSize == MinSize && "Could not implement MinSize!"); + (void) NopSize; + } + } + + OutStreamer->EmitInstruction(MCI, getSubtargetInfo()); +} + // Lower a stackmap of the form: // <id>, <shadowBytes>, ... void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { @@ -982,14 +1024,107 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, getSubtargetInfo()); } +void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, + SledKind Kind) { + auto Fn = MI.getParent()->getParent()->getFunction(); + auto Attr = Fn->getFnAttribute("function-instrument"); + bool AlwaysInstrument = + Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; + Sleds.emplace_back( + XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn}); +} + +void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // We want to emit the following pattern: + // + // .Lxray_sled_N: + // .palign 2, ... + // jmp .tmpN + // # 9 bytes worth of noops + // .tmpN + // + // We need the 9 bytes because at runtime, we'd be patching over the full 11 + // bytes with the following pattern: + // + // mov %r10, <function id, 32-bit> // 6 bytes + // call <relative offset, 32-bits> // 5 bytes + // + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + OutStreamer->EmitCodeAlignment(4); + auto Target = OutContext.createTempSymbol(); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x09"); + EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); +} + +void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // Since PATCHABLE_RET takes the opcode of the return statement as an + // argument, we use that to emit the correct form of the RET that we want. + // i.e. when we see this: + // + // PATCHABLE_RET X86::RET ... + // + // We should emit the RET followed by sleds. + // + // .Lxray_sled_N: + // ret # or equivalent instruction + // # 10 bytes worth of noops + // + // This just makes sure that the alignment for the next instruction is 2. + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + unsigned OpCode = MI.getOperand(0).getImm(); + MCInst Ret; + Ret.setOpcode(OpCode); + for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) + if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) + Ret.addOperand(MaybeOperand.getValue()); + OutStreamer->EmitInstruction(Ret, getSubtargetInfo()); + EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); + recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); +} + +void X86AsmPrinter::EmitXRayTable() { + if (Sleds.empty()) + return; + if (Subtarget->isTargetELF()) { + auto *Section = OutContext.getELFSection( + "xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0, + CurrentFnSym->getName()); + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + OutStreamer->SwitchSection(Section); + for (const auto &Sled : Sleds) { + OutStreamer->EmitSymbolValue(Sled.Sled, 8); + OutStreamer->EmitSymbolValue(CurrentFnSym, 8); + auto Kind = static_cast<uint8_t>(Sled.Kind); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast<const char *>(&Kind), 1)); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1)); + OutStreamer->EmitZeros(14); + } + OutStreamer->SwitchSection(PrevSection); + } + Sleds.clear(); +} + // Returns instruction preceding MBBI in MachineFunction. // If MBBI is the first instruction of the first basic block, returns null. static MachineBasicBlock::const_iterator PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { const MachineBasicBlock *MBB = MBBI->getParent(); while (MBBI == MBB->begin()) { - if (MBB == MBB->getParent()->begin()) - return nullptr; + if (MBB == &MBB->getParent()->front()) + return MachineBasicBlock::const_iterator(); MBB = MBB->getPrevNode(); MBBI = MBB->end(); } @@ -1018,7 +1153,8 @@ static const Constant *getConstantFromPool(const MachineInstr &MI, } static std::string getShuffleComment(const MachineOperand &DstOp, - const MachineOperand &SrcOp, + const MachineOperand &SrcOp1, + const MachineOperand &SrcOp2, ArrayRef<int> Mask) { std::string Comment; @@ -1031,40 +1167,51 @@ static std::string getShuffleComment(const MachineOperand &DstOp, return X86ATTInstPrinter::getRegisterName(RegNum); }; + // TODO: Add support for specifying an AVX512 style mask register in the comment. StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; - StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem"; + StringRef Src1Name = + SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; + StringRef Src2Name = + SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem"; + + // One source operand, fix the mask to print all elements in one span. + SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end()); + if (Src1Name == Src2Name) + for (int i = 0, e = ShuffleMask.size(); i != e; ++i) + if (ShuffleMask[i] >= e) + ShuffleMask[i] -= e; raw_string_ostream CS(Comment); CS << DstName << " = "; - bool NeedComma = false; - bool InSrc = false; - for (int M : Mask) { - // Wrap up any prior entry... - if (M == SM_SentinelZero && InSrc) { - InSrc = false; - CS << "]"; - } - if (NeedComma) + for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { + if (i != 0) CS << ","; - else - NeedComma = true; - - // Print this shuffle... - if (M == SM_SentinelZero) { + if (ShuffleMask[i] == SM_SentinelZero) { CS << "zero"; - } else { - if (!InSrc) { - InSrc = true; - CS << SrcName << "["; - } - if (M == SM_SentinelUndef) + continue; + } + + // Otherwise, it must come from src1 or src2. Print the span of elements + // that comes from this src. + bool isSrc1 = ShuffleMask[i] < (int)e; + CS << (isSrc1 ? Src1Name : Src2Name) << '['; + + bool IsFirst = true; + while (i != e && ShuffleMask[i] != SM_SentinelZero && + (ShuffleMask[i] < (int)e) == isSrc1) { + if (!IsFirst) + CS << ','; + else + IsFirst = false; + if (ShuffleMask[i] == SM_SentinelUndef) CS << "u"; else - CS << M; + CS << ShuffleMask[i] % (int)e; + ++i; } + CS << ']'; + --i; // For loop increments element #. } - if (InSrc) - CS << "]"; CS.flush(); return Comment; @@ -1202,12 +1349,21 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case TargetOpcode::FAULTING_LOAD_OP: return LowerFAULTING_LOAD_OP(*MI, MCInstLowering); + case TargetOpcode::PATCHABLE_OP: + return LowerPATCHABLE_OP(*MI, MCInstLowering); + case TargetOpcode::STACKMAP: return LowerSTACKMAP(*MI); case TargetOpcode::PATCHPOINT: return LowerPATCHPOINT(*MI, MCInstLowering); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_RET: + return LowerPATCHABLE_RET(*MI, MCInstLowering); + case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); return; @@ -1254,7 +1410,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::SEH_Epilogue: { MachineBasicBlock::const_iterator MBBI(MI); // Check if preceded by a call and emit nop if so. - for (MBBI = PrevCrossBBInst(MBBI); MBBI; MBBI = PrevCrossBBInst(MBBI)) { + for (MBBI = PrevCrossBBInst(MBBI); + MBBI != MachineBasicBlock::const_iterator(); + MBBI = PrevCrossBBInst(MBBI)) { // Conservatively assume that pseudo instructions don't emit code and keep // looking for a call. We may emit an unnecessary nop in some cases. if (!MBBI->isPseudo()) { @@ -1313,14 +1471,38 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector<int, 16> Mask; DecodePSHUFBMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); } break; } - case X86::VPERMILPSrm: + case X86::VPERMILPDrm: + case X86::VPERMILPDYrm: + case X86::VPERMILPDZ128rm: + case X86::VPERMILPDZ256rm: + case X86::VPERMILPDZrm: { + if (!OutStreamer->isVerboseAsm()) + break; + assert(MI->getNumOperands() > 5 && + "We should always have at least 5 operands!"); + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp = MI->getOperand(1); + const MachineOperand &MaskOp = MI->getOperand(5); + + if (auto *C = getConstantFromPool(*MI, MaskOp)) { + SmallVector<int, 8> Mask; + DecodeVPERMILPMask(C, 64, Mask); + if (!Mask.empty()) + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); + } + break; + } + + case X86::VPERMILPSrm: case X86::VPERMILPSYrm: - case X86::VPERMILPDYrm: { + case X86::VPERMILPSZ128rm: + case X86::VPERMILPSZ256rm: + case X86::VPERMILPSZrm: { if (!OutStreamer->isVerboseAsm()) break; assert(MI->getNumOperands() > 5 && @@ -1329,18 +1511,63 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &SrcOp = MI->getOperand(1); const MachineOperand &MaskOp = MI->getOperand(5); + if (auto *C = getConstantFromPool(*MI, MaskOp)) { + SmallVector<int, 16> Mask; + DecodeVPERMILPMask(C, 32, Mask); + if (!Mask.empty()) + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); + } + break; + } + + case X86::VPERMIL2PDrm: + case X86::VPERMIL2PSrm: + case X86::VPERMIL2PDrmY: + case X86::VPERMIL2PSrmY: { + if (!OutStreamer->isVerboseAsm()) + break; + assert(MI->getNumOperands() > 7 && + "We should always have at least 7 operands!"); + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp1 = MI->getOperand(1); + const MachineOperand &SrcOp2 = MI->getOperand(2); + const MachineOperand &MaskOp = MI->getOperand(6); + const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); + + if (!CtrlOp.isImm()) + break; + unsigned ElSize; switch (MI->getOpcode()) { default: llvm_unreachable("Invalid opcode"); - case X86::VPERMILPSrm: case X86::VPERMILPSYrm: ElSize = 32; break; - case X86::VPERMILPDrm: case X86::VPERMILPDYrm: ElSize = 64; break; + case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break; + case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break; + } + + if (auto *C = getConstantFromPool(*MI, MaskOp)) { + SmallVector<int, 16> Mask; + DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask); + if (!Mask.empty()) + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); } + break; + } + + case X86::VPPERMrrm: { + if (!OutStreamer->isVerboseAsm()) + break; + assert(MI->getNumOperands() > 6 && + "We should always have at least 6 operands!"); + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp1 = MI->getOperand(1); + const MachineOperand &SrcOp2 = MI->getOperand(2); + const MachineOperand &MaskOp = MI->getOperand(6); if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector<int, 16> Mask; - DecodeVPERMILPMask(C, ElSize, Mask); + DecodeVPPERMMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); } break; } @@ -1413,7 +1640,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { CS << CI->getZExtValue(); } else { // print multi-word constant as (w0,w1) - auto Val = CI->getValue(); + const auto &Val = CI->getValue(); CS << "("; for (int i = 0, N = Val.getNumWords(); i < N; ++i) { if (i > 0) @@ -1446,7 +1673,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // is at the end of the shadow. if (MI->isCall()) { // Count then size of the call towards the shadow - SMShadowTracker.count(TmpInst, getSubtargetInfo()); + SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); // Then flush the shadow so that we fill with nops before the call, not // after it. SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); |