diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86MCInstLower.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 166 |
1 files changed, 153 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 78098fd6262f..2fc9a2af01d7 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -569,6 +569,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) { unsigned NewOpc; switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break; case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break; case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break; @@ -640,6 +641,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) { unsigned NewOpc; switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break; case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break; case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break; @@ -876,6 +878,52 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::MOVSX64rr32: SimplifyMOVSX(OutMI); break; + + case X86::VCMPPDrri: + case X86::VCMPPDYrri: + case X86::VCMPPSrri: + case X86::VCMPPSYrri: + case X86::VCMPSDrr: + case X86::VCMPSSrr: { + // Swap the operands if it will enable a 2 byte VEX encoding. + // FIXME: Change the immediate to improve opportunities? + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { + unsigned Imm = MI->getOperand(3).getImm() & 0x7; + switch (Imm) { + default: break; + case 0x00: // EQUAL + case 0x03: // UNORDERED + case 0x04: // NOT EQUAL + case 0x07: // ORDERED + std::swap(OutMI.getOperand(1), OutMI.getOperand(2)); + break; + } + } + break; + } + + case X86::VMOVHLPSrr: + case X86::VUNPCKHPDrr: + // These are not truly commutable so hide them from the default case. + break; + + default: { + // If the instruction is a commutable arithmetic instruction we might be + // able to commute the operands to get a 2 byte VEX prefix. + uint64_t TSFlags = MI->getDesc().TSFlags; + if (MI->getDesc().isCommutable() && + (TSFlags & X86II::EncodingMask) == X86II::VEX && + (TSFlags & X86II::OpMapMask) == X86II::TB && + (TSFlags & X86II::FormMask) == X86II::MRMSrcReg && + !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) && + OutMI.getNumOperands() == 3) { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) + std::swap(OutMI.getOperand(1), OutMI.getOperand(2)); + } + break; + } } } @@ -983,13 +1031,32 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, } } +/// Return the longest nop which can be efficiently decoded for the given +/// target cpu. 15-bytes is the longest single NOP instruction, but some +/// platforms can't decode the longest forms efficiently. +static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) { + uint64_t MaxNopLength = 10; + if (STI.getFeatureBits()[X86::ProcIntelSLM]) + MaxNopLength = 7; + else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) + MaxNopLength = 15; + else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP]) + MaxNopLength = 11; + return MaxNopLength; +} + /// Emit the largest nop instruction smaller than or equal to \p NumBytes /// bytes. Return the size of nop emitted. static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSubtargetInfo &STI) { - // This works only for 64bit. For 32bit we have to do additional checking if - // the CPU supports multi-byte nops. - assert(Is64Bit && "EmitNops only supports X86-64"); + if (!Is64Bit) { + // TODO Do additional checking if the CPU supports multi-byte nops. + OS.EmitInstruction(MCInstBuilder(X86::NOOP), STI); + return 1; + } + + // Cap a single nop emission at the profitable value for the target + NumBytes = std::min(NumBytes, MaxLongNopLength(STI)); unsigned NopSize; unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; @@ -1094,10 +1161,35 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, } } +/// A RAII helper which defines a region of instructions which can't have +/// padding added between them for correctness. +struct NoAutoPaddingScope { + MCStreamer &OS; + const bool OldAllowAutoPadding; + NoAutoPaddingScope(MCStreamer &OS) + : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { + changeAndComment(false); + } + ~NoAutoPaddingScope() { + changeAndComment(OldAllowAutoPadding); + } + void changeAndComment(bool b) { + if (b == OS.getAllowAutoPadding()) + return; + OS.setAllowAutoPadding(b); + if (b) + OS.emitRawComment("autopadding"); + else + OS.emitRawComment("noautopadding"); + } +}; + void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL) { assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); + NoAutoPaddingScope NoPadScope(*OutStreamer); + StatepointOpers SOpers(&MI); if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(), @@ -1148,7 +1240,10 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, // Record our statepoint node in the same section used by STACKMAP // and PATCHPOINT - SM.recordStatepoint(MI); + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->EmitLabel(MILabel); + SM.recordStatepoint(*MILabel, MI); } void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, @@ -1156,6 +1251,8 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, // <opcode>, <operands> + NoAutoPaddingScope NoPadScope(*OutStreamer); + Register DefRegister = FaultingMI.getOperand(0).getReg(); FaultMaps::FaultKind FK = static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); @@ -1163,8 +1260,12 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, unsigned Opcode = FaultingMI.getOperand(3).getImm(); unsigned OperandsBeginIdx = 4; + auto &Ctx = OutStreamer->getContext(); + MCSymbol *FaultingLabel = Ctx.createTempSymbol(); + OutStreamer->EmitLabel(FaultingLabel); + assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); - FM.recordFaultingOp(FK, HandlerLabel); + FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); MCInst MI; MI.setOpcode(Opcode); @@ -1199,6 +1300,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, X86MCInstLower &MCIL) { // PATCHABLE_OP minsize, opcode, operands + NoAutoPaddingScope NoPadScope(*OutStreamer); + unsigned MinSize = MI.getOperand(0).getImm(); unsigned Opcode = MI.getOperand(1).getImm(); @@ -1236,7 +1339,12 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, // <id>, <shadowBytes>, ... void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); - SM.recordStackMap(MI); + + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->EmitLabel(MILabel); + + SM.recordStackMap(*MILabel, MI); unsigned NumShadowBytes = MI.getOperand(1).getImm(); SMShadowTracker.reset(NumShadowBytes); } @@ -1249,7 +1357,12 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); - SM.recordPatchPoint(MI); + NoAutoPaddingScope NoPadScope(*OutStreamer); + + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->EmitLabel(MILabel); + SM.recordPatchPoint(*MILabel, MI); PatchPointOpers opers(&MI); unsigned ScratchIdx = opers.getNextScratchIdx(); @@ -1305,6 +1418,8 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); + NoAutoPaddingScope NoPadScope(*OutStreamer); + // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. // @@ -1337,10 +1452,10 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, // The default C calling convention will place two arguments into %rcx and // %rdx -- so we only work with those. - unsigned DestRegs[] = {X86::RDI, X86::RSI}; + const Register DestRegs[] = {X86::RDI, X86::RSI}; bool UsedMask[] = {false, false}; // Filled out in loop. - unsigned SrcRegs[] = {0, 0}; + Register SrcRegs[] = {0, 0}; // Then we put the operands in the %rdi and %rsi registers. We spill the // values in the register before we clobber them, and mark them as used in @@ -1350,7 +1465,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, for (unsigned I = 0; I < MI.getNumOperands(); ++I) if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { assert(Op->isReg() && "Only support arguments in registers"); - SrcRegs[I] = Op->getReg(); + SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); if (SrcRegs[I] != DestRegs[I]) { UsedMask[I] = true; EmitAndCountInstruction( @@ -1361,6 +1476,9 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, } // Now that the register values are stashed, mov arguments into place. + // FIXME: This doesn't work if one of the later SrcRegs is equal to an + // earlier DestReg. We will have already overwritten over the register before + // we can copy from it. for (unsigned I = 0; I < MI.getNumOperands(); ++I) if (SrcRegs[I] != DestRegs[I]) EmitAndCountInstruction( @@ -1396,6 +1514,8 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); + NoAutoPaddingScope NoPadScope(*OutStreamer); + // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. // @@ -1429,11 +1549,11 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, // so we'll work with those. Or we may be called via SystemV, in which case // we don't have to do any translation. - unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; + const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; bool UsedMask[] = {false, false, false}; // Will fill out src regs in the loop. - unsigned SrcRegs[] = {0, 0, 0}; + Register SrcRegs[] = {0, 0, 0}; // Then we put the operands in the SystemV registers. We spill the values in // the registers before we clobber them, and mark them as used in UsedMask. @@ -1443,7 +1563,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { // TODO: Is register only support adequate? assert(Op->isReg() && "Only supports arguments in registers"); - SrcRegs[I] = Op->getReg(); + SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64); if (SrcRegs[I] != DestRegs[I]) { UsedMask[I] = true; EmitAndCountInstruction( @@ -1459,6 +1579,9 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, // is clobbers. We've already added nops to account for the size of mov and // push if the register is in the right place, so we only have to worry about // emitting movs. + // FIXME: This doesn't work if one of the later SrcRegs is equal to an + // earlier DestReg. We will have already overwritten over the register before + // we can copy from it. for (unsigned I = 0; I < MI.getNumOperands(); ++I) if (UsedMask[I]) EmitAndCountInstruction( @@ -1490,6 +1613,19 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { + + NoAutoPaddingScope NoPadScope(*OutStreamer); + + const Function &F = MF->getFunction(); + if (F.hasFnAttribute("patchable-function-entry")) { + unsigned Num; + if (F.getFnAttribute("patchable-function-entry") + .getValueAsString() + .getAsInteger(10, Num)) + return; + EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo()); + return; + } // We want to emit the following pattern: // // .p2align 1, ... @@ -1517,6 +1653,8 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL) { + NoAutoPaddingScope NoPadScope(*OutStreamer); + // Since PATCHABLE_RET takes the opcode of the return statement as an // argument, we use that to emit the correct form of the RET that we want. // i.e. when we see this: @@ -1547,6 +1685,8 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { + NoAutoPaddingScope NoPadScope(*OutStreamer); + // Like PATCHABLE_RET, we have the actual instruction in the operands to this // instruction so we lower that particular instruction and its operands. // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how |