summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86MCInstLower.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86MCInstLower.cpp')
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp166
1 files changed, 153 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 78098fd6262f..2fc9a2af01d7 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -569,6 +569,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
unsigned NewOpc;
switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break;
case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break;
case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break;
@@ -640,6 +641,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
unsigned NewOpc;
switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break;
case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break;
case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break;
@@ -876,6 +878,52 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::MOVSX64rr32:
SimplifyMOVSX(OutMI);
break;
+
+ case X86::VCMPPDrri:
+ case X86::VCMPPDYrri:
+ case X86::VCMPPSrri:
+ case X86::VCMPPSYrri:
+ case X86::VCMPSDrr:
+ case X86::VCMPSSrr: {
+ // Swap the operands if it will enable a 2 byte VEX encoding.
+ // FIXME: Change the immediate to improve opportunities?
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned Imm = MI->getOperand(3).getImm() & 0x7;
+ switch (Imm) {
+ default: break;
+ case 0x00: // EQUAL
+ case 0x03: // UNORDERED
+ case 0x04: // NOT EQUAL
+ case 0x07: // ORDERED
+ std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
+ break;
+ }
+ }
+ break;
+ }
+
+ case X86::VMOVHLPSrr:
+ case X86::VUNPCKHPDrr:
+ // These are not truly commutable so hide them from the default case.
+ break;
+
+ default: {
+ // If the instruction is a commutable arithmetic instruction we might be
+ // able to commute the operands to get a 2 byte VEX prefix.
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ if (MI->getDesc().isCommutable() &&
+ (TSFlags & X86II::EncodingMask) == X86II::VEX &&
+ (TSFlags & X86II::OpMapMask) == X86II::TB &&
+ (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
+ !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
+ OutMI.getNumOperands() == 3) {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
+ std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
+ }
+ break;
+ }
}
}
@@ -983,13 +1031,32 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
}
}
+/// Return the longest nop which can be efficiently decoded for the given
+/// target cpu. 15-bytes is the longest single NOP instruction, but some
+/// platforms can't decode the longest forms efficiently.
+static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) {
+ uint64_t MaxNopLength = 10;
+ if (STI.getFeatureBits()[X86::ProcIntelSLM])
+ MaxNopLength = 7;
+ else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
+ MaxNopLength = 15;
+ else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
+ MaxNopLength = 11;
+ return MaxNopLength;
+}
+
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
/// bytes. Return the size of nop emitted.
static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
const MCSubtargetInfo &STI) {
- // This works only for 64bit. For 32bit we have to do additional checking if
- // the CPU supports multi-byte nops.
- assert(Is64Bit && "EmitNops only supports X86-64");
+ if (!Is64Bit) {
+ // TODO Do additional checking if the CPU supports multi-byte nops.
+ OS.EmitInstruction(MCInstBuilder(X86::NOOP), STI);
+ return 1;
+ }
+
+ // Cap a single nop emission at the profitable value for the target
+ NumBytes = std::min(NumBytes, MaxLongNopLength(STI));
unsigned NopSize;
unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
@@ -1094,10 +1161,35 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
}
}
+/// A RAII helper which defines a region of instructions which can't have
+/// padding added between them for correctness.
+struct NoAutoPaddingScope {
+ MCStreamer &OS;
+ const bool OldAllowAutoPadding;
+ NoAutoPaddingScope(MCStreamer &OS)
+ : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
+ changeAndComment(false);
+ }
+ ~NoAutoPaddingScope() {
+ changeAndComment(OldAllowAutoPadding);
+ }
+ void changeAndComment(bool b) {
+ if (b == OS.getAllowAutoPadding())
+ return;
+ OS.setAllowAutoPadding(b);
+ if (b)
+ OS.emitRawComment("autopadding");
+ else
+ OS.emitRawComment("noautopadding");
+ }
+};
+
void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
StatepointOpers SOpers(&MI);
if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
@@ -1148,7 +1240,10 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
// Record our statepoint node in the same section used by STACKMAP
// and PATCHPOINT
- SM.recordStatepoint(MI);
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+ SM.recordStatepoint(*MILabel, MI);
}
void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
@@ -1156,6 +1251,8 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
// FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
// <opcode>, <operands>
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
Register DefRegister = FaultingMI.getOperand(0).getReg();
FaultMaps::FaultKind FK =
static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
@@ -1163,8 +1260,12 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
unsigned Opcode = FaultingMI.getOperand(3).getImm();
unsigned OperandsBeginIdx = 4;
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *FaultingLabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(FaultingLabel);
+
assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
- FM.recordFaultingOp(FK, HandlerLabel);
+ FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
MCInst MI;
MI.setOpcode(Opcode);
@@ -1199,6 +1300,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
X86MCInstLower &MCIL) {
// PATCHABLE_OP minsize, opcode, operands
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
unsigned MinSize = MI.getOperand(0).getImm();
unsigned Opcode = MI.getOperand(1).getImm();
@@ -1236,7 +1339,12 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
// <id>, <shadowBytes>, ...
void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
- SM.recordStackMap(MI);
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+
+ SM.recordStackMap(*MILabel, MI);
unsigned NumShadowBytes = MI.getOperand(1).getImm();
SMShadowTracker.reset(NumShadowBytes);
}
@@ -1249,7 +1357,12 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
- SM.recordPatchPoint(MI);
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+ SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers opers(&MI);
unsigned ScratchIdx = opers.getNextScratchIdx();
@@ -1305,6 +1418,8 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// We want to emit the following pattern, which follows the x86 calling
// convention to prepare for the trampoline call to be patched in.
//
@@ -1337,10 +1452,10 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
// The default C calling convention will place two arguments into %rcx and
// %rdx -- so we only work with those.
- unsigned DestRegs[] = {X86::RDI, X86::RSI};
+ const Register DestRegs[] = {X86::RDI, X86::RSI};
bool UsedMask[] = {false, false};
// Filled out in loop.
- unsigned SrcRegs[] = {0, 0};
+ Register SrcRegs[] = {0, 0};
// Then we put the operands in the %rdi and %rsi registers. We spill the
// values in the register before we clobber them, and mark them as used in
@@ -1350,7 +1465,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
assert(Op->isReg() && "Only support arguments in registers");
- SrcRegs[I] = Op->getReg();
+ SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
if (SrcRegs[I] != DestRegs[I]) {
UsedMask[I] = true;
EmitAndCountInstruction(
@@ -1361,6 +1476,9 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
}
// Now that the register values are stashed, mov arguments into place.
+ // FIXME: This doesn't work if one of the later SrcRegs is equal to an
+ // earlier DestReg. We will have already overwritten over the register before
+ // we can copy from it.
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
if (SrcRegs[I] != DestRegs[I])
EmitAndCountInstruction(
@@ -1396,6 +1514,8 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// We want to emit the following pattern, which follows the x86 calling
// convention to prepare for the trampoline call to be patched in.
//
@@ -1429,11 +1549,11 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
// so we'll work with those. Or we may be called via SystemV, in which case
// we don't have to do any translation.
- unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
+ const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
bool UsedMask[] = {false, false, false};
// Will fill out src regs in the loop.
- unsigned SrcRegs[] = {0, 0, 0};
+ Register SrcRegs[] = {0, 0, 0};
// Then we put the operands in the SystemV registers. We spill the values in
// the registers before we clobber them, and mark them as used in UsedMask.
@@ -1443,7 +1563,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
// TODO: Is register only support adequate?
assert(Op->isReg() && "Only supports arguments in registers");
- SrcRegs[I] = Op->getReg();
+ SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
if (SrcRegs[I] != DestRegs[I]) {
UsedMask[I] = true;
EmitAndCountInstruction(
@@ -1459,6 +1579,9 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// is clobbers. We've already added nops to account for the size of mov and
// push if the register is in the right place, so we only have to worry about
// emitting movs.
+ // FIXME: This doesn't work if one of the later SrcRegs is equal to an
+ // earlier DestReg. We will have already overwritten over the register before
+ // we can copy from it.
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
if (UsedMask[I])
EmitAndCountInstruction(
@@ -1490,6 +1613,19 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
X86MCInstLower &MCIL) {
+
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
+ const Function &F = MF->getFunction();
+ if (F.hasFnAttribute("patchable-function-entry")) {
+ unsigned Num;
+ if (F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, Num))
+ return;
+ EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo());
+ return;
+ }
// We want to emit the following pattern:
//
// .p2align 1, ...
@@ -1517,6 +1653,8 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
X86MCInstLower &MCIL) {
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// Since PATCHABLE_RET takes the opcode of the return statement as an
// argument, we use that to emit the correct form of the RET that we want.
// i.e. when we see this:
@@ -1547,6 +1685,8 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// Like PATCHABLE_RET, we have the actual instruction in the operands to this
// instruction so we lower that particular instruction and its operands.
// Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how