summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp582
1 files changed, 431 insertions, 151 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index dbe45356c42b..c29029daeec9 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -30,7 +30,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -465,7 +465,7 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
// Don't waste compile time scanning use-def chains of physregs.
- if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
+ if (!Register::isVirtualRegister(BaseReg))
return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg),
@@ -480,9 +480,50 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
}
bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const {
+ AAResults *AA) const {
switch (MI.getOpcode()) {
- default: break;
+ default:
+ // This function should only be called for opcodes with the ReMaterializable
+ // flag set.
+ llvm_unreachable("Unknown rematerializable operation!");
+ break;
+
+ case X86::LOAD_STACK_GUARD:
+ case X86::AVX1_SETALLONES:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX512_128_SET0:
+ case X86::AVX512_256_SET0:
+ case X86::AVX512_512_SET0:
+ case X86::AVX512_512_SETALLONES:
+ case X86::AVX512_FsFLD0SD:
+ case X86::AVX512_FsFLD0SS:
+ case X86::AVX512_FsFLD0F128:
+ case X86::AVX_SET0:
+ case X86::FsFLD0SD:
+ case X86::FsFLD0SS:
+ case X86::FsFLD0F128:
+ case X86::KSET0D:
+ case X86::KSET0Q:
+ case X86::KSET0W:
+ case X86::KSET1D:
+ case X86::KSET1Q:
+ case X86::KSET1W:
+ case X86::MMX_SET0:
+ case X86::MOV32ImmSExti8:
+ case X86::MOV32r0:
+ case X86::MOV32r1:
+ case X86::MOV32r_1:
+ case X86::MOV32ri64:
+ case X86::MOV64ImmSExti8:
+ case X86::V_SET0:
+ case X86::V_SETALLONES:
+ case X86::MOV16ri:
+ case X86::MOV32ri:
+ case X86::MOV64ri:
+ case X86::MOV64ri32:
+ case X86::MOV8ri:
+ return true;
+
case X86::MOV8rm:
case X86::MOV8rm_NOREX:
case X86::MOV16rm:
@@ -561,7 +602,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
MI.isDereferenceableInvariantLoad(AA)) {
- unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
+ Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
// Allow re-materialization of PIC load.
@@ -583,7 +624,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
// lea fi#, lea GV, etc. are all rematerializable.
if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
return true;
- unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
+ Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
if (BaseReg == 0)
return true;
// Allow re-materialization of lea PICBase + x.
@@ -594,10 +635,6 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
return false;
}
}
-
- // All other instructions marked M_REMATERIALIZABLE are always trivially
- // rematerializable.
- return true;
}
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
@@ -664,7 +701,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
}
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
- unsigned Opc, bool AllowSP, unsigned &NewSrc,
+ unsigned Opc, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const {
MachineFunction &MF = *MI.getParent()->getParent();
@@ -675,7 +712,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
RC = Opc != X86::LEA32r ?
&X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
}
- unsigned SrcReg = Src.getReg();
+ Register SrcReg = Src.getReg();
// For both LEA64 and LEA32 the register already has essentially the right
// type (32-bit or 64-bit) we may just need to forbid SP.
@@ -684,7 +721,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
isKill = Src.isKill();
assert(!Src.isUndef() && "Undef op doesn't need optimization");
- if (TargetRegisterInfo::isVirtualRegister(NewSrc) &&
+ if (Register::isVirtualRegister(NewSrc) &&
!MF.getRegInfo().constrainRegClass(NewSrc, RC))
return false;
@@ -693,7 +730,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// This is for an LEA64_32r and incoming registers are 32-bit. One way or
// another we need to add 64-bit registers to the final MI.
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ if (Register::isPhysicalRegister(SrcReg)) {
ImplicitOp = Src;
ImplicitOp.setImplicit();
@@ -740,8 +777,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
return nullptr;
unsigned Opcode = X86::LEA64_32r;
- unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
- unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
@@ -751,8 +788,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
// But testing has shown this *does* help performance in 64-bit mode (at
// least on modern x86 machines).
MachineBasicBlock::iterator MBBI = MI.getIterator();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
+ Register Dest = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
bool IsDead = MI.getOperand(0).isDead();
bool IsKill = MI.getOperand(1).isKill();
unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
@@ -794,7 +831,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
case X86::ADD8rr_DB:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
- unsigned Src2 = MI.getOperand(2).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
bool IsKill2 = MI.getOperand(2).isKill();
assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
unsigned InRegLEA2 = 0;
@@ -888,7 +925,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ if (Register::isVirtualRegister(Src.getReg()) &&
!MF.getRegInfo().constrainRegClass(Src.getReg(),
&X86::GR64_NOSPRegClass))
return nullptr;
@@ -911,7 +948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// LEA can't handle ESP.
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, ImplicitOp, LV))
@@ -947,7 +984,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
(Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
ImplicitOp, LV))
@@ -970,7 +1007,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
ImplicitOp, LV))
@@ -1005,7 +1042,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
@@ -1013,7 +1050,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
const MachineOperand &Src2 = MI.getOperand(2);
bool isKill2;
- unsigned SrcReg2;
+ Register SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
SrcReg2, isKill2, ImplicitOp2, LV))
@@ -1054,7 +1091,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
@@ -1085,6 +1122,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
case X86::SUB32ri8:
case X86::SUB32ri: {
+ if (!MI.getOperand(2).isImm())
+ return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
if (!isInt<32>(-Imm))
return nullptr;
@@ -1093,7 +1132,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- unsigned SrcReg;
+ Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
@@ -1111,6 +1150,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SUB64ri8:
case X86::SUB64ri32: {
+ if (!MI.getOperand(2).isImm())
+ return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
if (!isInt<32>(-Imm))
return nullptr;
@@ -1140,40 +1181,62 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
- case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
+ case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
+ case X86::VBROADCASTSDZ256mk:
+ case X86::VBROADCASTSDZmk:
+ case X86::VBROADCASTSSZ128mk:
+ case X86::VBROADCASTSSZ256mk:
+ case X86::VBROADCASTSSZmk:
+ case X86::VPBROADCASTDZ128mk:
+ case X86::VPBROADCASTDZ256mk:
+ case X86::VPBROADCASTDZmk:
+ case X86::VPBROADCASTQZ128mk:
+ case X86::VPBROADCASTQZ256mk:
+ case X86::VPBROADCASTQZmk: {
unsigned Opc;
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
- case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
- case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
- case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
- case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
- case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
- case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
- case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
+ case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
+ case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
+ case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
+ case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
+ case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
+ case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break;
+ case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break;
+ case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break;
+ case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break;
+ case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break;
+ case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break;
+ case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break;
+ case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break;
+ case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break;
+ case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break;
+ case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1187,6 +1250,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.add(MI.getOperand(7));
break;
}
+
case X86::VMOVDQU8Z128rrk:
case X86::VMOVDQU8Z256rrk:
case X86::VMOVDQU8Zrrk:
@@ -1683,6 +1747,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
+ case X86::VCMPSDZrr:
+ case X86::VCMPSSZrr:
+ case X86::VCMPPDZrri:
+ case X86::VCMPPSZrri:
+ case X86::VCMPPDZ128rri:
+ case X86::VCMPPSZ128rri:
+ case X86::VCMPPDZ256rri:
+ case X86::VCMPPSZ256rri:
+ case X86::VCMPPDZrrik:
+ case X86::VCMPPSZrrik:
+ case X86::VCMPPDZ128rrik:
+ case X86::VCMPPSZ128rrik:
+ case X86::VCMPPDZ256rrik:
+ case X86::VCMPPSZ256rrik: {
+ unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f;
+ Imm = X86::getSwappedVCMPImm(Imm);
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
case X86::VPERM2F128rr:
case X86::VPERM2I128rr: {
// Flip permute source immediate.
@@ -1859,7 +1944,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
// CommutableOpIdx2 is well defined now. Let's choose another commutable
// operand and assign its index to CommutableOpIdx1.
- unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
+ Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
unsigned CommutableOpIdx1;
for (CommutableOpIdx1 = LastCommutableVecOp;
@@ -1889,7 +1974,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
return true;
}
-bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
const MCInstrDesc &Desc = MI.getDesc();
if (!Desc.isCommutable())
@@ -1926,17 +2012,23 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
// Ordered/Unordered/Equal/NotEqual tests
unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
switch (Imm) {
+ default:
+ // EVEX versions can be commuted.
+ if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX)
+ break;
+ return false;
case 0x00: // EQUAL
case 0x03: // UNORDERED
case 0x04: // NOT EQUAL
case 0x07: // ORDERED
- // The indices of the commutable operands are 1 and 2 (or 2 and 3
- // when masked).
- // Assign them to the returned operand indices here.
- return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
- 2 + OpOffset);
+ break;
}
- return false;
+
+ // The indices of the commutable operands are 1 and 2 (or 2 and 3
+ // when masked).
+ // Assign them to the returned operand indices here.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
+ 2 + OpOffset);
}
case X86::MOVSSrr:
// X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
@@ -1990,6 +2082,24 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
case X86::VPTERNLOGQZ256rmbikz:
case X86::VPTERNLOGQZrmbikz:
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ case X86::VPDPWSSDZ128r:
+ case X86::VPDPWSSDZ128rk:
+ case X86::VPDPWSSDZ128rkz:
+ case X86::VPDPWSSDZ256r:
+ case X86::VPDPWSSDZ256rk:
+ case X86::VPDPWSSDZ256rkz:
+ case X86::VPDPWSSDZr:
+ case X86::VPDPWSSDZrk:
+ case X86::VPDPWSSDZrkz:
+ case X86::VPDPWSSDSZ128r:
+ case X86::VPDPWSSDSZ128rk:
+ case X86::VPDPWSSDSZ128rkz:
+ case X86::VPDPWSSDSZ256r:
+ case X86::VPDPWSSDSZ256rk:
+ case X86::VPDPWSSDSZ256rkz:
+ case X86::VPDPWSSDSZr:
+ case X86::VPDPWSSDSZrk:
+ case X86::VPDPWSSDSZrkz:
case X86::VPMADD52HUQZ128r:
case X86::VPMADD52HUQZ128rk:
case X86::VPMADD52HUQZ128rkz:
@@ -2215,7 +2325,7 @@ unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) {
}
}
-/// Get the VPCMP immediate if the opcodes are swapped.
+/// Get the VPCMP immediate if the operands are swapped.
unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
switch (Imm) {
default: llvm_unreachable("Unreachable!");
@@ -2233,7 +2343,7 @@ unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
return Imm;
}
-/// Get the VPCOM immediate if the opcodes are swapped.
+/// Get the VPCOM immediate if the operands are swapped.
unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
switch (Imm) {
default: llvm_unreachable("Unreachable!");
@@ -2251,6 +2361,23 @@ unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
return Imm;
}
+/// Get the VCMP immediate if the operands are swapped.
+unsigned X86::getSwappedVCMPImm(unsigned Imm) {
+ // Only need the lower 2 bits to distinquish.
+ switch (Imm & 0x3) {
+ default: llvm_unreachable("Unreachable!");
+ case 0x00: case 0x03:
+ // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted.
+ break;
+ case 0x01: case 0x02:
+ // Need to toggle bits 3:0. Bit 4 stays the same.
+ Imm ^= 0xf;
+ break;
+ }
+
+ return Imm;
+}
+
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
if (!MI.isTerminator()) return false;
@@ -3131,25 +3258,6 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(isKill));
}
-void X86InstrInfo::storeRegToAddr(
- MachineFunction &MF, unsigned SrcReg, bool isKill,
- SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
- ArrayRef<MachineMemOperand *> MMOs,
- SmallVectorImpl<MachineInstr *> &NewMIs) const {
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
- unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
- DebugLoc DL;
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
- for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.add(Addr[i]);
- MIB.addReg(SrcReg, getKillRegState(isKill));
- MIB.setMemRefs(MMOs);
- NewMIs.push_back(MIB);
-}
-
-
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
@@ -3164,23 +3272,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
}
-void X86InstrInfo::loadRegFromAddr(
- MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
- ArrayRef<MachineMemOperand *> MMOs,
- SmallVectorImpl<MachineInstr *> &NewMIs) const {
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
- unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
- DebugLoc DL;
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
- for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.add(Addr[i]);
- MIB.setMemRefs(MMOs);
- NewMIs.push_back(MIB);
-}
-
bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &CmpMask,
int &CmpValue) const {
@@ -3599,8 +3690,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (!IsCmpZero && !Sub)
return false;
- bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg);
+ bool IsSwapped =
+ (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg);
// Scan forward from the instruction after CmpInstr for uses of EFLAGS.
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
@@ -3755,7 +3847,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg != FoldAsLoadDefReg)
continue;
// Do not fold if we have a subreg use or a def.
@@ -3785,7 +3877,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
const MCInstrDesc &Desc) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
MIB->setDesc(Desc);
// MachineInstr::addOperand() will insert explicit operands before any
@@ -3815,7 +3907,7 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
bool MinusOne) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
// Insert the XOR.
BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
@@ -3891,7 +3983,7 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
const GlobalValue *GV =
cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
auto Flags = MachineMemOperand::MOLoad |
@@ -3929,7 +4021,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
const MCInstrDesc &LoadDesc,
const MCInstrDesc &BroadcastDesc,
unsigned SubIdx) {
- unsigned DestReg = MIB->getOperand(0).getReg();
+ Register DestReg = MIB->getOperand(0).getReg();
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(DestReg) < 16) {
// We can use a normal VEX encoded load.
@@ -3952,7 +4044,7 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB,
const MCInstrDesc &StoreDesc,
const MCInstrDesc &ExtractDesc,
unsigned SubIdx) {
- unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
+ Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(SrcReg) < 16) {
// We can use a normal VEX encoded store.
@@ -4008,12 +4100,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
+ case X86::FsFLD0F128:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0: {
assert(HasAVX && "AVX not supported");
const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned SrcReg = MIB->getOperand(0).getReg();
- unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+ Register SrcReg = MIB->getOperand(0).getReg();
+ Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
Expand2AddrUndef(MIB, get(X86::VXORPSrr));
MIB.addReg(SrcReg, RegState::ImplicitDefine);
@@ -4021,9 +4114,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::AVX512_128_SET0:
case X86::AVX512_FsFLD0SS:
- case X86::AVX512_FsFLD0SD: {
+ case X86::AVX512_FsFLD0SD:
+ case X86::AVX512_FsFLD0F128: {
bool HasVLX = Subtarget.hasVLX();
- unsigned SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB->getOperand(0).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
return Expand2AddrUndef(MIB,
@@ -4037,10 +4131,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0: {
bool HasVLX = Subtarget.hasVLX();
- unsigned SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB->getOperand(0).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
- unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+ Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
Expand2AddrUndef(MIB,
get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
@@ -4060,14 +4154,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
case X86::AVX1_SETALLONES: {
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
// VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
MIB->setDesc(get(X86::VCMPPSYrri));
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
return true;
}
case X86::AVX512_512_SETALLONES: {
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
MIB->setDesc(get(X86::VPTERNLOGDZrri));
// VPTERNLOGD needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
@@ -4077,8 +4171,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::AVX512_512_SEXT_MASK_32:
case X86::AVX512_512_SEXT_MASK_64: {
- unsigned Reg = MIB->getOperand(0).getReg();
- unsigned MaskReg = MIB->getOperand(1).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
+ Register MaskReg = MIB->getOperand(1).getReg();
unsigned MaskState = getRegState(MIB->getOperand(1));
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
@@ -4115,8 +4209,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
case X86::MOV32ri64: {
- unsigned Reg = MIB->getOperand(0).getReg();
- unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
+ Register Reg = MIB->getOperand(0).getReg();
+ Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
MI.setDesc(get(X86::MOV32ri));
MIB->getOperand(0).setReg(Reg32);
MIB.addReg(Reg, RegState::ImplicitDefine);
@@ -4251,8 +4345,8 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// If MI is marked as reading Reg, the partial register update is wanted.
const MachineOperand &MO = MI.getOperand(0);
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg)) {
if (MO.readsReg() || MI.readsVirtualRegister(Reg))
return 0;
} else {
@@ -4268,7 +4362,10 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// Return true for any instruction the copies the high bits of the first source
// operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
+static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
+ bool ForLoadFold = false) {
+ // Set the OpNum parameter to the first source operand.
+ OpNum = 1;
switch (Opcode) {
case X86::VCVTSI2SSrr:
case X86::VCVTSI2SSrm:
@@ -4427,6 +4524,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
return true;
+ case X86::VMOVSSZrrk:
+ case X86::VMOVSDZrrk:
+ OpNum = 3;
+ return true;
+ case X86::VMOVSSZrrkz:
+ case X86::VMOVSDZrrkz:
+ OpNum = 2;
+ return true;
}
return false;
@@ -4449,14 +4554,11 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
unsigned
X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
const TargetRegisterInfo *TRI) const {
- if (!hasUndefRegUpdate(MI.getOpcode()))
+ if (!hasUndefRegUpdate(MI.getOpcode(), OpNum))
return 0;
- // Set the OpNum parameter to the first source operand.
- OpNum = 1;
-
const MachineOperand &MO = MI.getOperand(OpNum);
- if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) {
return UndefRegClearance;
}
return 0;
@@ -4464,7 +4566,7 @@ X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
void X86InstrInfo::breakPartialRegDependency(
MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
- unsigned Reg = MI.getOperand(OpNum).getReg();
+ Register Reg = MI.getOperand(OpNum).getReg();
// If MI kills this register, the false dependence is already broken.
if (MI.killsRegister(Reg, TRI))
return;
@@ -4480,7 +4582,7 @@ void X86InstrInfo::breakPartialRegDependency(
} else if (X86::VR256RegClass.contains(Reg)) {
// Use vxorps to clear the full ymm register.
// It wants to read and write the xmm sub-register.
- unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
.addReg(XReg, RegState::Undef)
.addReg(XReg, RegState::Undef)
@@ -4489,7 +4591,7 @@ void X86InstrInfo::breakPartialRegDependency(
} else if (X86::GR64RegClass.contains(Reg)) {
// Using XOR32rr because it has shorter encoding and zeros up the upper bits
// as well.
- unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit);
+ Register XReg = TRI->getSubReg(Reg, X86::sub_32bit);
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg)
.addReg(XReg, RegState::Undef)
.addReg(XReg, RegState::Undef)
@@ -4538,8 +4640,8 @@ static void updateOperandRegConstraints(MachineFunction &MF,
// We only need to update constraints on virtual register operands.
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TRI.isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
auto *NewRC = MRI.constrainRegClass(
@@ -4698,7 +4800,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
MachineInstr &MI) {
- if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) ||
+ unsigned Ignored;
+ if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) ||
!MI.getOperand(1).isReg())
return false;
@@ -4788,6 +4891,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (I != nullptr) {
unsigned Opcode = I->DstOp;
unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
+ MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0;
if (Align < MinAlign)
return nullptr;
bool NarrowToMOV32rm = false;
@@ -4821,8 +4925,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// If this is the special case where we use a MOV32rm to load a 32-bit
// value and zero-extend the top bits. Change the destination register
// to a 32-bit one.
- unsigned DstReg = NewMI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ Register DstReg = NewMI->getOperand(0).getReg();
+ if (Register::isPhysicalRegister(DstReg))
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
else
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
@@ -5133,6 +5237,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX512_128_SET0:
+ case X86::FsFLD0F128:
+ case X86::AVX512_FsFLD0F128:
Alignment = 16;
break;
case X86::MMX_SET0:
@@ -5182,7 +5288,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
case X86::FsFLD0SS:
- case X86::AVX512_FsFLD0SS: {
+ case X86::AVX512_FsFLD0SS:
+ case X86::FsFLD0F128:
+ case X86::AVX512_FsFLD0F128: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
@@ -5212,6 +5320,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = Type::getFloatTy(MF.getFunction().getContext());
else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction().getContext());
+ else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
+ Ty = Type::getFP128Ty(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
@@ -5293,6 +5403,51 @@ extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
return StoreMMOs;
}
+static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
+ const TargetRegisterClass *RC,
+ const X86Subtarget &STI) {
+ assert(STI.hasAVX512() && "Expected at least AVX512!");
+ unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC);
+ assert((SpillSize == 64 || STI.hasVLX()) &&
+ "Can't broadcast less than 64 bytes without AVX512VL!");
+
+ switch (I->Flags & TB_BCAST_MASK) {
+ default: llvm_unreachable("Unexpected broadcast type!");
+ case TB_BCAST_D:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VPBROADCASTDZ128m;
+ case 32: return X86::VPBROADCASTDZ256m;
+ case 64: return X86::VPBROADCASTDZm;
+ }
+ break;
+ case TB_BCAST_Q:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VPBROADCASTQZ128m;
+ case 32: return X86::VPBROADCASTQZ256m;
+ case 64: return X86::VPBROADCASTQZm;
+ }
+ break;
+ case TB_BCAST_SS:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VBROADCASTSSZ128m;
+ case 32: return X86::VBROADCASTSSZ256m;
+ case 64: return X86::VBROADCASTSSZm;
+ }
+ break;
+ case TB_BCAST_SD:
+ switch (SpillSize) {
+ default: llvm_unreachable("Unknown spill size");
+ case 16: return X86::VMOVDDUPZ128rm;
+ case 32: return X86::VBROADCASTSDZ256m;
+ case 64: return X86::VBROADCASTSDZm;
+ }
+ break;
+ }
+}
+
bool X86InstrInfo::unfoldMemoryOperand(
MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
@@ -5303,6 +5458,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
unsigned Index = I->Flags & TB_INDEX_MASK;
bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
bool FoldedStore = I->Flags & TB_FOLDED_STORE;
+ bool FoldedBCast = I->Flags & TB_FOLDED_BCAST;
if (UnfoldLoad && !FoldedLoad)
return false;
UnfoldLoad &= FoldedLoad;
@@ -5311,7 +5467,9 @@ bool X86InstrInfo::unfoldMemoryOperand(
UnfoldStore &= FoldedStore;
const MCInstrDesc &MCID = get(Opc);
+
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// TODO: Check if 32-byte or greater accesses are slow too?
if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMem16Slow())
@@ -5335,10 +5493,26 @@ bool X86InstrInfo::unfoldMemoryOperand(
AfterOps.push_back(Op);
}
- // Emit the load instruction.
+ // Emit the load or broadcast instruction.
if (UnfoldLoad) {
auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
- loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs);
+
+ unsigned Opc;
+ if (FoldedBCast) {
+ Opc = getBroadcastOpcode(I, RC, Subtarget);
+ } else {
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget);
+ }
+
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg);
+ for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
+ MIB.add(AddrOps[i]);
+ MIB.setMemRefs(MMOs);
+ NewMIs.push_back(MIB);
+
if (UnfoldStore) {
// Address operands cannot be marked isKill.
for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
@@ -5404,7 +5578,16 @@ bool X86InstrInfo::unfoldMemoryOperand(
if (UnfoldStore) {
const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
- storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs);
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
+ DebugLoc DL;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = AddrOps.size(); i != e; ++i)
+ MIB.add(AddrOps[i]);
+ MIB.addReg(Reg, RegState::Kill);
+ MIB.setMemRefs(MMOs);
+ NewMIs.push_back(MIB);
}
return true;
@@ -5423,6 +5606,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned Index = I->Flags & TB_INDEX_MASK;
bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
bool FoldedStore = I->Flags & TB_FOLDED_STORE;
+ bool FoldedBCast = I->Flags & TB_FOLDED_BCAST;
const MCInstrDesc &MCID = get(Opc);
MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -5456,10 +5640,17 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
- unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
- Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
- VT, MVT::Other, AddrOps);
+
+ unsigned Opc;
+ if (FoldedBCast) {
+ Opc = getBroadcastOpcode(I, RC, Subtarget);
+ } else {
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget);
+ }
+
+ Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps);
NewNodes.push_back(Load);
// Preserve memory reference information.
@@ -7367,6 +7558,96 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
}
}
+Optional<ParamLoadedValue>
+X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+ const MachineOperand *Op = nullptr;
+ DIExpression *Expr = nullptr;
+
+ switch (MI.getOpcode()) {
+ case X86::LEA32r:
+ case X86::LEA64r:
+ case X86::LEA64_32r: {
+ // Operand 4 could be global address. For now we do not support
+ // such situation.
+ if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm())
+ return None;
+
+ const MachineOperand &Op1 = MI.getOperand(1);
+ const MachineOperand &Op2 = MI.getOperand(3);
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister ||
+ Register::isPhysicalRegister(Op2.getReg())));
+
+ // Omit situations like:
+ // %rsi = lea %rsi, 4, ...
+ if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) ||
+ Op2.getReg() == MI.getOperand(0).getReg())
+ return None;
+ else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) ||
+ (Op2.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg())))
+ return None;
+
+ int64_t Coef = MI.getOperand(2).getImm();
+ int64_t Offset = MI.getOperand(4).getImm();
+ SmallVector<uint64_t, 8> Ops;
+
+ if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) {
+ Op = &Op1;
+ } else if (Op1.isFI())
+ Op = &Op1;
+
+ if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(Coef + 1);
+ Ops.push_back(dwarf::DW_OP_mul);
+ } else {
+ if (Op && Op2.getReg() != X86::NoRegister) {
+ int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false);
+ if (dwarfReg < 0)
+ return None;
+ else if (dwarfReg < 32) {
+ Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg);
+ Ops.push_back(0);
+ } else {
+ Ops.push_back(dwarf::DW_OP_bregx);
+ Ops.push_back(dwarfReg);
+ Ops.push_back(0);
+ }
+ } else if (!Op) {
+ assert(Op2.getReg() != X86::NoRegister);
+ Op = &Op2;
+ }
+
+ if (Coef > 1) {
+ assert(Op2.getReg() != X86::NoRegister);
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(Coef);
+ Ops.push_back(dwarf::DW_OP_mul);
+ }
+
+ if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) &&
+ Op2.getReg() != X86::NoRegister) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ }
+ }
+
+ DIExpression::appendOffset(Ops, Offset);
+ Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops);
+
+ return ParamLoadedValue(*Op, Expr);;
+ }
+ case X86::XOR32rr: {
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
+ return ParamLoadedValue(MachineOperand::CreateImm(0), Expr);
+ return None;
+ }
+ default:
+ return TargetInstrInfo::describeLoadedValue(MI);
+ }
+}
+
/// This is an architecture-specific helper function of reassociateOps.
/// Set special operand attributes for new instructions after reassociation.
void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
@@ -7500,9 +7781,8 @@ namespace {
// movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
// addq %rcx, %rax
// RAX now holds address of _GLOBAL_OFFSET_TABLE_.
- unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
- unsigned GOTReg =
- RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
.addReg(X86::RIP)
.addImm(0)