diff options
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 582 |
1 files changed, 431 insertions, 151 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index dbe45356c42b..c29029daeec9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -30,7 +30,7 @@ #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -465,7 +465,7 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, /// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { // Don't waste compile time scanning use-def chains of physregs. - if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) + if (!Register::isVirtualRegister(BaseReg)) return false; bool isPICBase = false; for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg), @@ -480,9 +480,50 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { } bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { + AAResults *AA) const { switch (MI.getOpcode()) { - default: break; + default: + // This function should only be called for opcodes with the ReMaterializable + // flag set. + llvm_unreachable("Unknown rematerializable operation!"); + break; + + case X86::LOAD_STACK_GUARD: + case X86::AVX1_SETALLONES: + case X86::AVX2_SETALLONES: + case X86::AVX512_128_SET0: + case X86::AVX512_256_SET0: + case X86::AVX512_512_SET0: + case X86::AVX512_512_SETALLONES: + case X86::AVX512_FsFLD0SD: + case X86::AVX512_FsFLD0SS: + case X86::AVX512_FsFLD0F128: + case X86::AVX_SET0: + case X86::FsFLD0SD: + case X86::FsFLD0SS: + case X86::FsFLD0F128: + case X86::KSET0D: + case X86::KSET0Q: + case X86::KSET0W: + case X86::KSET1D: + case X86::KSET1Q: + case X86::KSET1W: + case X86::MMX_SET0: + case X86::MOV32ImmSExti8: + case X86::MOV32r0: + case X86::MOV32r1: + case X86::MOV32r_1: + case X86::MOV32ri64: + case X86::MOV64ImmSExti8: + case X86::V_SET0: + case X86::V_SETALLONES: + case X86::MOV16ri: + case X86::MOV32ri: + case X86::MOV64ri: + case X86::MOV64ri32: + case X86::MOV8ri: + return true; + case X86::MOV8rm: case X86::MOV8rm_NOREX: case X86::MOV16rm: @@ -561,7 +602,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, MI.getOperand(1 + X86::AddrIndexReg).isReg() && MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && MI.isDereferenceableInvariantLoad(AA)) { - unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); + Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; // Allow re-materialization of PIC load. @@ -583,7 +624,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, // lea fi#, lea GV, etc. are all rematerializable. if (!MI.getOperand(1 + X86::AddrBaseReg).isReg()) return true; - unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); + Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); if (BaseReg == 0) return true; // Allow re-materialization of lea PICBase + x. @@ -594,10 +635,6 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, return false; } } - - // All other instructions marked M_REMATERIALIZABLE are always trivially - // rematerializable. - return true; } void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, @@ -664,7 +701,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { } bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, - unsigned Opc, bool AllowSP, unsigned &NewSrc, + unsigned Opc, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV) const { MachineFunction &MF = *MI.getParent()->getParent(); @@ -675,7 +712,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; } - unsigned SrcReg = Src.getReg(); + Register SrcReg = Src.getReg(); // For both LEA64 and LEA32 the register already has essentially the right // type (32-bit or 64-bit) we may just need to forbid SP. @@ -684,7 +721,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, isKill = Src.isKill(); assert(!Src.isUndef() && "Undef op doesn't need optimization"); - if (TargetRegisterInfo::isVirtualRegister(NewSrc) && + if (Register::isVirtualRegister(NewSrc) && !MF.getRegInfo().constrainRegClass(NewSrc, RC)) return false; @@ -693,7 +730,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, // This is for an LEA64_32r and incoming registers are 32-bit. One way or // another we need to add 64-bit registers to the final MI. - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (Register::isPhysicalRegister(SrcReg)) { ImplicitOp = Src; ImplicitOp.setImplicit(); @@ -740,8 +777,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( return nullptr; unsigned Opcode = X86::LEA64_32r; - unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); - unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); + Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); + Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); // Build and insert into an implicit UNDEF value. This is OK because // we will be shifting and then extracting the lower 8/16-bits. @@ -751,8 +788,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( // But testing has shown this *does* help performance in 64-bit mode (at // least on modern x86 machines). MachineBasicBlock::iterator MBBI = MI.getIterator(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); bool IsDead = MI.getOperand(0).isDead(); bool IsKill = MI.getOperand(1).isKill(); unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; @@ -794,7 +831,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( case X86::ADD8rr_DB: case X86::ADD16rr: case X86::ADD16rr_DB: { - unsigned Src2 = MI.getOperand(2).getReg(); + Register Src2 = MI.getOperand(2).getReg(); bool IsKill2 = MI.getOperand(2).isKill(); assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); unsigned InRegLEA2 = 0; @@ -888,7 +925,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + if (Register::isVirtualRegister(Src.getReg()) && !MF.getRegInfo().constrainRegClass(Src.getReg(), &X86::GR64_NOSPRegClass)) return nullptr; @@ -911,7 +948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // LEA can't handle ESP. bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -947,7 +984,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -970,7 +1007,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -1005,7 +1042,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1013,7 +1050,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, const MachineOperand &Src2 = MI.getOperand(2); bool isKill2; - unsigned SrcReg2; + Register SrcReg2; MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, SrcReg2, isKill2, ImplicitOp2, LV)) @@ -1054,7 +1091,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1085,6 +1122,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; case X86::SUB32ri8: case X86::SUB32ri: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1093,7 +1132,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1111,6 +1150,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SUB64ri8: case X86::SUB64ri32: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1140,40 +1181,62 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk: case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk: case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk: - case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: { + case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: + case X86::VBROADCASTSDZ256mk: + case X86::VBROADCASTSDZmk: + case X86::VBROADCASTSSZ128mk: + case X86::VBROADCASTSSZ256mk: + case X86::VBROADCASTSSZmk: + case X86::VPBROADCASTDZ128mk: + case X86::VPBROADCASTDZ256mk: + case X86::VPBROADCASTDZmk: + case X86::VPBROADCASTQZ128mk: + case X86::VPBROADCASTQZ256mk: + case X86::VPBROADCASTQZmk: { unsigned Opc; switch (MIOpc) { default: llvm_unreachable("Unreachable!"); - case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; - case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; - case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; - case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; - case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; - case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; - case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; - case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; - case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; - case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; - case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; - case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; - case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; - case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; - case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; - case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; - case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; - case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; - case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; - case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; - case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; - case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; - case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; - case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; - case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; - case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; - case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; - case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; - case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; - case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; + case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; + case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; + case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; + case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; + case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; + case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break; + case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break; + case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break; + case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break; + case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break; + case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break; + case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break; + case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break; + case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break; + case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break; + case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break; } NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) @@ -1187,6 +1250,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .add(MI.getOperand(7)); break; } + case X86::VMOVDQU8Z128rrk: case X86::VMOVDQU8Z256rrk: case X86::VMOVDQU8Zrrk: @@ -1683,6 +1747,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } + case X86::VCMPSDZrr: + case X86::VCMPSSZrr: + case X86::VCMPPDZrri: + case X86::VCMPPSZrri: + case X86::VCMPPDZ128rri: + case X86::VCMPPSZ128rri: + case X86::VCMPPDZ256rri: + case X86::VCMPPSZ256rri: + case X86::VCMPPDZrrik: + case X86::VCMPPSZrrik: + case X86::VCMPPDZ128rrik: + case X86::VCMPPSZ128rrik: + case X86::VCMPPDZ256rrik: + case X86::VCMPPSZ256rrik: { + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f; + Imm = X86::getSwappedVCMPImm(Imm); + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } case X86::VPERM2F128rr: case X86::VPERM2I128rr: { // Flip permute source immediate. @@ -1859,7 +1944,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, // CommutableOpIdx2 is well defined now. Let's choose another commutable // operand and assign its index to CommutableOpIdx1. - unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); + Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); unsigned CommutableOpIdx1; for (CommutableOpIdx1 = LastCommutableVecOp; @@ -1889,7 +1974,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, return true; } -bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { const MCInstrDesc &Desc = MI.getDesc(); if (!Desc.isCommutable()) @@ -1926,17 +2012,23 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, // Ordered/Unordered/Equal/NotEqual tests unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7; switch (Imm) { + default: + // EVEX versions can be commuted. + if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX) + break; + return false; case 0x00: // EQUAL case 0x03: // UNORDERED case 0x04: // NOT EQUAL case 0x07: // ORDERED - // The indices of the commutable operands are 1 and 2 (or 2 and 3 - // when masked). - // Assign them to the returned operand indices here. - return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, - 2 + OpOffset); + break; } - return false; + + // The indices of the commutable operands are 1 and 2 (or 2 and 3 + // when masked). + // Assign them to the returned operand indices here. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, + 2 + OpOffset); } case X86::MOVSSrr: // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can @@ -1990,6 +2082,24 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, case X86::VPTERNLOGQZ256rmbikz: case X86::VPTERNLOGQZrmbikz: return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + case X86::VPDPWSSDZ128r: + case X86::VPDPWSSDZ128rk: + case X86::VPDPWSSDZ128rkz: + case X86::VPDPWSSDZ256r: + case X86::VPDPWSSDZ256rk: + case X86::VPDPWSSDZ256rkz: + case X86::VPDPWSSDZr: + case X86::VPDPWSSDZrk: + case X86::VPDPWSSDZrkz: + case X86::VPDPWSSDSZ128r: + case X86::VPDPWSSDSZ128rk: + case X86::VPDPWSSDSZ128rkz: + case X86::VPDPWSSDSZ256r: + case X86::VPDPWSSDSZ256rk: + case X86::VPDPWSSDSZ256rkz: + case X86::VPDPWSSDSZr: + case X86::VPDPWSSDSZrk: + case X86::VPDPWSSDSZrkz: case X86::VPMADD52HUQZ128r: case X86::VPMADD52HUQZ128rk: case X86::VPMADD52HUQZ128rkz: @@ -2215,7 +2325,7 @@ unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { } } -/// Get the VPCMP immediate if the opcodes are swapped. +/// Get the VPCMP immediate if the operands are swapped. unsigned X86::getSwappedVPCMPImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2233,7 +2343,7 @@ unsigned X86::getSwappedVPCMPImm(unsigned Imm) { return Imm; } -/// Get the VPCOM immediate if the opcodes are swapped. +/// Get the VPCOM immediate if the operands are swapped. unsigned X86::getSwappedVPCOMImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2251,6 +2361,23 @@ unsigned X86::getSwappedVPCOMImm(unsigned Imm) { return Imm; } +/// Get the VCMP immediate if the operands are swapped. +unsigned X86::getSwappedVCMPImm(unsigned Imm) { + // Only need the lower 2 bits to distinquish. + switch (Imm & 0x3) { + default: llvm_unreachable("Unreachable!"); + case 0x00: case 0x03: + // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted. + break; + case 0x01: case 0x02: + // Need to toggle bits 3:0. Bit 4 stays the same. + Imm ^= 0xf; + break; + } + + return Imm; +} + bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { if (!MI.isTerminator()) return false; @@ -3131,25 +3258,6 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(isKill)); } -void X86InstrInfo::storeRegToAddr( - MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, - ArrayRef<MachineMemOperand *> MMOs, - SmallVectorImpl<MachineInstr *> &NewMIs) const { - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); - DebugLoc DL; - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.add(Addr[i]); - MIB.addReg(SrcReg, getKillRegState(isKill)); - MIB.setMemRefs(MMOs); - NewMIs.push_back(MIB); -} - - void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, @@ -3164,23 +3272,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); } -void X86InstrInfo::loadRegFromAddr( - MachineFunction &MF, unsigned DestReg, - SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, - ArrayRef<MachineMemOperand *> MMOs, - SmallVectorImpl<MachineInstr *> &NewMIs) const { - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); - DebugLoc DL; - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.add(Addr[i]); - MIB.setMemRefs(MMOs); - NewMIs.push_back(MIB); -} - bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const { @@ -3599,8 +3690,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (!IsCmpZero && !Sub) return false; - bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg); + bool IsSwapped = + (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg); // Scan forward from the instruction after CmpInstr for uses of EFLAGS. // It is safe to remove CmpInstr if EFLAGS is redefined or killed. @@ -3755,7 +3847,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg != FoldAsLoadDefReg) continue; // Do not fold if we have a subreg use or a def. @@ -3785,7 +3877,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); MIB->setDesc(Desc); // MachineInstr::addOperand() will insert explicit operands before any @@ -3815,7 +3907,7 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne) { MachineBasicBlock &MBB = *MIB->getParent(); DebugLoc DL = MIB->getDebugLoc(); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); // Insert the XOR. BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg) @@ -3891,7 +3983,7 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { MachineBasicBlock &MBB = *MIB->getParent(); DebugLoc DL = MIB->getDebugLoc(); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); const GlobalValue *GV = cast<GlobalValue>((*MIB->memoperands_begin())->getValue()); auto Flags = MachineMemOperand::MOLoad | @@ -3929,7 +4021,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx) { - unsigned DestReg = MIB->getOperand(0).getReg(); + Register DestReg = MIB->getOperand(0).getReg(); // Check if DestReg is XMM16-31 or YMM16-31. if (TRI->getEncodingValue(DestReg) < 16) { // We can use a normal VEX encoded load. @@ -3952,7 +4044,7 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx) { - unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); + Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); // Check if DestReg is XMM16-31 or YMM16-31. if (TRI->getEncodingValue(SrcReg) < 16) { // We can use a normal VEX encoded store. @@ -4008,12 +4100,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: + case X86::FsFLD0F128: return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::AVX_SET0: { assert(HasAVX && "AVX not supported"); const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned SrcReg = MIB->getOperand(0).getReg(); - unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); + Register SrcReg = MIB->getOperand(0).getReg(); + Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); MIB->getOperand(0).setReg(XReg); Expand2AddrUndef(MIB, get(X86::VXORPSrr)); MIB.addReg(SrcReg, RegState::ImplicitDefine); @@ -4021,9 +4114,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case X86::AVX512_128_SET0: case X86::AVX512_FsFLD0SS: - case X86::AVX512_FsFLD0SD: { + case X86::AVX512_FsFLD0SD: + case X86::AVX512_FsFLD0F128: { bool HasVLX = Subtarget.hasVLX(); - unsigned SrcReg = MIB->getOperand(0).getReg(); + Register SrcReg = MIB->getOperand(0).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) return Expand2AddrUndef(MIB, @@ -4037,10 +4131,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::AVX512_256_SET0: case X86::AVX512_512_SET0: { bool HasVLX = Subtarget.hasVLX(); - unsigned SrcReg = MIB->getOperand(0).getReg(); + Register SrcReg = MIB->getOperand(0).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) { - unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); + Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); MIB->getOperand(0).setReg(XReg); Expand2AddrUndef(MIB, get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); @@ -4060,14 +4154,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::AVX2_SETALLONES: return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); case X86::AVX1_SETALLONES: { - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS. MIB->setDesc(get(X86::VCMPPSYrri)); MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); return true; } case X86::AVX512_512_SETALLONES: { - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); MIB->setDesc(get(X86::VPTERNLOGDZrri)); // VPTERNLOGD needs 3 register inputs and an immediate. // 0xff will return 1s for any input. @@ -4077,8 +4171,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case X86::AVX512_512_SEXT_MASK_32: case X86::AVX512_512_SEXT_MASK_64: { - unsigned Reg = MIB->getOperand(0).getReg(); - unsigned MaskReg = MIB->getOperand(1).getReg(); + Register Reg = MIB->getOperand(0).getReg(); + Register MaskReg = MIB->getOperand(1).getReg(); unsigned MaskState = getRegState(MIB->getOperand(1)); unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ? X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz; @@ -4115,8 +4209,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); case X86::MOV32ri64: { - unsigned Reg = MIB->getOperand(0).getReg(); - unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit); + Register Reg = MIB->getOperand(0).getReg(); + Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit); MI.setDesc(get(X86::MOV32ri)); MIB->getOperand(0).setReg(Reg32); MIB.addReg(Reg, RegState::ImplicitDefine); @@ -4251,8 +4345,8 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // If MI is marked as reading Reg, the partial register update is wanted. const MachineOperand &MO = MI.getOperand(0); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { if (MO.readsReg() || MI.readsVirtualRegister(Reg)) return 0; } else { @@ -4268,7 +4362,10 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // Return true for any instruction the copies the high bits of the first source // operand into the unused high bits of the destination operand. -static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { +static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum, + bool ForLoadFold = false) { + // Set the OpNum parameter to the first source operand. + OpNum = 1; switch (Opcode) { case X86::VCVTSI2SSrr: case X86::VCVTSI2SSrm: @@ -4427,6 +4524,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { case X86::VSQRTSDZm: case X86::VSQRTSDZm_Int: return true; + case X86::VMOVSSZrrk: + case X86::VMOVSDZrrk: + OpNum = 3; + return true; + case X86::VMOVSSZrrkz: + case X86::VMOVSDZrrkz: + OpNum = 2; + return true; } return false; @@ -4449,14 +4554,11 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { unsigned X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, const TargetRegisterInfo *TRI) const { - if (!hasUndefRegUpdate(MI.getOpcode())) + if (!hasUndefRegUpdate(MI.getOpcode(), OpNum)) return 0; - // Set the OpNum parameter to the first source operand. - OpNum = 1; - const MachineOperand &MO = MI.getOperand(OpNum); - if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) { return UndefRegClearance; } return 0; @@ -4464,7 +4566,7 @@ X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, void X86InstrInfo::breakPartialRegDependency( MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - unsigned Reg = MI.getOperand(OpNum).getReg(); + Register Reg = MI.getOperand(OpNum).getReg(); // If MI kills this register, the false dependence is already broken. if (MI.killsRegister(Reg, TRI)) return; @@ -4480,7 +4582,7 @@ void X86InstrInfo::breakPartialRegDependency( } else if (X86::VR256RegClass.contains(Reg)) { // Use vxorps to clear the full ymm register. // It wants to read and write the xmm sub-register. - unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); + Register XReg = TRI->getSubReg(Reg, X86::sub_xmm); BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg) .addReg(XReg, RegState::Undef) .addReg(XReg, RegState::Undef) @@ -4489,7 +4591,7 @@ void X86InstrInfo::breakPartialRegDependency( } else if (X86::GR64RegClass.contains(Reg)) { // Using XOR32rr because it has shorter encoding and zeros up the upper bits // as well. - unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit); + Register XReg = TRI->getSubReg(Reg, X86::sub_32bit); BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg) .addReg(XReg, RegState::Undef) .addReg(XReg, RegState::Undef) @@ -4538,8 +4640,8 @@ static void updateOperandRegConstraints(MachineFunction &MF, // We only need to update constraints on virtual register operands. if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TRI.isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; auto *NewRC = MRI.constrainRegClass( @@ -4698,7 +4800,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) { - if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) || + unsigned Ignored; + if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) || !MI.getOperand(1).isReg()) return false; @@ -4788,6 +4891,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( if (I != nullptr) { unsigned Opcode = I->DstOp; unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; + MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0; if (Align < MinAlign) return nullptr; bool NarrowToMOV32rm = false; @@ -4821,8 +4925,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // If this is the special case where we use a MOV32rm to load a 32-bit // value and zero-extend the top bits. Change the destination register // to a 32-bit one. - unsigned DstReg = NewMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = NewMI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit)); else NewMI->getOperand(0).setSubReg(X86::sub_32bit); @@ -5133,6 +5237,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::V_SET0: case X86::V_SETALLONES: case X86::AVX512_128_SET0: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: Alignment = 16; break; case X86::MMX_SET0: @@ -5182,7 +5288,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::FsFLD0SD: case X86::AVX512_FsFLD0SD: case X86::FsFLD0SS: - case X86::AVX512_FsFLD0SS: { + case X86::AVX512_FsFLD0SS: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. @@ -5212,6 +5320,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Ty = Type::getFloatTy(MF.getFunction().getContext()); else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction().getContext()); + else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128) + Ty = Type::getFP128Ty(MF.getFunction().getContext()); else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16); else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || @@ -5293,6 +5403,51 @@ extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) { return StoreMMOs; } +static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I, + const TargetRegisterClass *RC, + const X86Subtarget &STI) { + assert(STI.hasAVX512() && "Expected at least AVX512!"); + unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC); + assert((SpillSize == 64 || STI.hasVLX()) && + "Can't broadcast less than 64 bytes without AVX512VL!"); + + switch (I->Flags & TB_BCAST_MASK) { + default: llvm_unreachable("Unexpected broadcast type!"); + case TB_BCAST_D: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VPBROADCASTDZ128m; + case 32: return X86::VPBROADCASTDZ256m; + case 64: return X86::VPBROADCASTDZm; + } + break; + case TB_BCAST_Q: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VPBROADCASTQZ128m; + case 32: return X86::VPBROADCASTQZ256m; + case 64: return X86::VPBROADCASTQZm; + } + break; + case TB_BCAST_SS: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VBROADCASTSSZ128m; + case 32: return X86::VBROADCASTSSZ256m; + case 64: return X86::VBROADCASTSSZm; + } + break; + case TB_BCAST_SD: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VMOVDDUPZ128rm; + case 32: return X86::VBROADCASTSDZ256m; + case 64: return X86::VBROADCASTSDZm; + } + break; + } +} + bool X86InstrInfo::unfoldMemoryOperand( MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const { @@ -5303,6 +5458,7 @@ bool X86InstrInfo::unfoldMemoryOperand( unsigned Index = I->Flags & TB_INDEX_MASK; bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; bool FoldedStore = I->Flags & TB_FOLDED_STORE; + bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; if (UnfoldLoad && !FoldedLoad) return false; UnfoldLoad &= FoldedLoad; @@ -5311,7 +5467,9 @@ bool X86InstrInfo::unfoldMemoryOperand( UnfoldStore &= FoldedStore; const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // TODO: Check if 32-byte or greater accesses are slow too? if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass && Subtarget.isUnalignedMem16Slow()) @@ -5335,10 +5493,26 @@ bool X86InstrInfo::unfoldMemoryOperand( AfterOps.push_back(Op); } - // Emit the load instruction. + // Emit the load or broadcast instruction. if (UnfoldLoad) { auto MMOs = extractLoadMMOs(MI.memoperands(), MF); - loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs); + + unsigned Opc; + if (FoldedBCast) { + Opc = getBroadcastOpcode(I, RC, Subtarget); + } else { + unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget); + } + + DebugLoc DL; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg); + for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) + MIB.add(AddrOps[i]); + MIB.setMemRefs(MMOs); + NewMIs.push_back(MIB); + if (UnfoldStore) { // Address operands cannot be marked isKill. for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { @@ -5404,7 +5578,16 @@ bool X86InstrInfo::unfoldMemoryOperand( if (UnfoldStore) { const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); auto MMOs = extractStoreMMOs(MI.memoperands(), MF); - storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs); + unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget); + DebugLoc DL; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); + for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) + MIB.add(AddrOps[i]); + MIB.addReg(Reg, RegState::Kill); + MIB.setMemRefs(MMOs); + NewMIs.push_back(MIB); } return true; @@ -5423,6 +5606,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned Index = I->Flags & TB_INDEX_MASK; bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; bool FoldedStore = I->Flags & TB_FOLDED_STORE; + bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -5456,10 +5640,17 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, - VT, MVT::Other, AddrOps); + + unsigned Opc; + if (FoldedBCast) { + Opc = getBroadcastOpcode(I, RC, Subtarget); + } else { + unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget); + } + + Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps); NewNodes.push_back(Load); // Preserve memory reference information. @@ -7367,6 +7558,96 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { } } +Optional<ParamLoadedValue> +X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const { + const MachineOperand *Op = nullptr; + DIExpression *Expr = nullptr; + + switch (MI.getOpcode()) { + case X86::LEA32r: + case X86::LEA64r: + case X86::LEA64_32r: { + // Operand 4 could be global address. For now we do not support + // such situation. + if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm()) + return None; + + const MachineOperand &Op1 = MI.getOperand(1); + const MachineOperand &Op2 = MI.getOperand(3); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister || + Register::isPhysicalRegister(Op2.getReg()))); + + // Omit situations like: + // %rsi = lea %rsi, 4, ... + if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) || + Op2.getReg() == MI.getOperand(0).getReg()) + return None; + else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister && + TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) || + (Op2.getReg() != X86::NoRegister && + TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg()))) + return None; + + int64_t Coef = MI.getOperand(2).getImm(); + int64_t Offset = MI.getOperand(4).getImm(); + SmallVector<uint64_t, 8> Ops; + + if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) { + Op = &Op1; + } else if (Op1.isFI()) + Op = &Op1; + + if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(Coef + 1); + Ops.push_back(dwarf::DW_OP_mul); + } else { + if (Op && Op2.getReg() != X86::NoRegister) { + int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false); + if (dwarfReg < 0) + return None; + else if (dwarfReg < 32) { + Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg); + Ops.push_back(0); + } else { + Ops.push_back(dwarf::DW_OP_bregx); + Ops.push_back(dwarfReg); + Ops.push_back(0); + } + } else if (!Op) { + assert(Op2.getReg() != X86::NoRegister); + Op = &Op2; + } + + if (Coef > 1) { + assert(Op2.getReg() != X86::NoRegister); + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(Coef); + Ops.push_back(dwarf::DW_OP_mul); + } + + if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) && + Op2.getReg() != X86::NoRegister) { + Ops.push_back(dwarf::DW_OP_plus); + } + } + + DIExpression::appendOffset(Ops, Offset); + Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops); + + return ParamLoadedValue(*Op, Expr);; + } + case X86::XOR32rr: { + if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) + return ParamLoadedValue(MachineOperand::CreateImm(0), Expr); + return None; + } + default: + return TargetInstrInfo::describeLoadedValue(MI); + } +} + /// This is an architecture-specific helper function of reassociateOps. /// Set special operand attributes for new instructions after reassociation. void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, @@ -7500,9 +7781,8 @@ namespace { // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx // addq %rcx, %rax // RAX now holds address of _GLOBAL_OFFSET_TABLE_. - unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); - unsigned GOTReg = - RegInfo.createVirtualRegister(&X86::GR64RegClass); + Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); + Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg) .addReg(X86::RIP) .addImm(0) |