aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp851
1 files changed, 533 insertions, 318 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4dcd886fa3b2..ec32ac2acad1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -25,13 +25,16 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -137,298 +140,70 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- // By default, assume that the instruction is not data invariant.
+ if (MI.mayLoad() || MI.mayStore())
return false;
- // Some target-independent operations that trivially lower to data-invariant
- // instructions.
- case TargetOpcode::COPY:
- case TargetOpcode::INSERT_SUBREG:
- case TargetOpcode::SUBREG_TO_REG:
+ // Some target-independent operations that trivially lower to data-invariant
+ // instructions.
+ if (MI.isCopyLike() || MI.isInsertSubreg())
return true;
+ unsigned Opcode = MI.getOpcode();
+ using namespace X86;
// On x86 it is believed that imul is constant time w.r.t. the loaded data.
// However, they set flags and are perhaps the most surprisingly constant
// time operations so we call them out here separately.
- case X86::IMUL16rr:
- case X86::IMUL16rri8:
- case X86::IMUL16rri:
- case X86::IMUL32rr:
- case X86::IMUL32rri8:
- case X86::IMUL32rri:
- case X86::IMUL64rr:
- case X86::IMUL64rri32:
- case X86::IMUL64rri8:
-
+ if (isIMUL(Opcode))
+ return true;
// Bit scanning and counting instructions that are somewhat surprisingly
// constant time as they scan across bits and do other fairly complex
// operations like popcnt, but are believed to be constant time on x86.
// However, these set flags.
- case X86::BSF16rr:
- case X86::BSF32rr:
- case X86::BSF64rr:
- case X86::BSR16rr:
- case X86::BSR32rr:
- case X86::BSR64rr:
- case X86::LZCNT16rr:
- case X86::LZCNT32rr:
- case X86::LZCNT64rr:
- case X86::POPCNT16rr:
- case X86::POPCNT32rr:
- case X86::POPCNT64rr:
- case X86::TZCNT16rr:
- case X86::TZCNT32rr:
- case X86::TZCNT64rr:
-
+ if (isBSF(Opcode) || isBSR(Opcode) || isLZCNT(Opcode) || isPOPCNT(Opcode) ||
+ isTZCNT(Opcode))
+ return true;
// Bit manipulation instructions are effectively combinations of basic
// arithmetic ops, and should still execute in constant time. These also
// set flags.
- case X86::BLCFILL32rr:
- case X86::BLCFILL64rr:
- case X86::BLCI32rr:
- case X86::BLCI64rr:
- case X86::BLCIC32rr:
- case X86::BLCIC64rr:
- case X86::BLCMSK32rr:
- case X86::BLCMSK64rr:
- case X86::BLCS32rr:
- case X86::BLCS64rr:
- case X86::BLSFILL32rr:
- case X86::BLSFILL64rr:
- case X86::BLSI32rr:
- case X86::BLSI64rr:
- case X86::BLSIC32rr:
- case X86::BLSIC64rr:
- case X86::BLSMSK32rr:
- case X86::BLSMSK64rr:
- case X86::BLSR32rr:
- case X86::BLSR64rr:
- case X86::TZMSK32rr:
- case X86::TZMSK64rr:
-
+ if (isBLCFILL(Opcode) || isBLCI(Opcode) || isBLCIC(Opcode) ||
+ isBLCMSK(Opcode) || isBLCS(Opcode) || isBLSFILL(Opcode) ||
+ isBLSI(Opcode) || isBLSIC(Opcode) || isBLSMSK(Opcode) || isBLSR(Opcode) ||
+ isTZMSK(Opcode))
+ return true;
// Bit extracting and clearing instructions should execute in constant time,
// and set flags.
- case X86::BEXTR32rr:
- case X86::BEXTR64rr:
- case X86::BEXTRI32ri:
- case X86::BEXTRI64ri:
- case X86::BZHI32rr:
- case X86::BZHI64rr:
-
+ if (isBEXTR(Opcode) || isBZHI(Opcode))
+ return true;
// Shift and rotate.
- case X86::ROL8r1:
- case X86::ROL16r1:
- case X86::ROL32r1:
- case X86::ROL64r1:
- case X86::ROL8rCL:
- case X86::ROL16rCL:
- case X86::ROL32rCL:
- case X86::ROL64rCL:
- case X86::ROL8ri:
- case X86::ROL16ri:
- case X86::ROL32ri:
- case X86::ROL64ri:
- case X86::ROR8r1:
- case X86::ROR16r1:
- case X86::ROR32r1:
- case X86::ROR64r1:
- case X86::ROR8rCL:
- case X86::ROR16rCL:
- case X86::ROR32rCL:
- case X86::ROR64rCL:
- case X86::ROR8ri:
- case X86::ROR16ri:
- case X86::ROR32ri:
- case X86::ROR64ri:
- case X86::SAR8r1:
- case X86::SAR16r1:
- case X86::SAR32r1:
- case X86::SAR64r1:
- case X86::SAR8rCL:
- case X86::SAR16rCL:
- case X86::SAR32rCL:
- case X86::SAR64rCL:
- case X86::SAR8ri:
- case X86::SAR16ri:
- case X86::SAR32ri:
- case X86::SAR64ri:
- case X86::SHL8r1:
- case X86::SHL16r1:
- case X86::SHL32r1:
- case X86::SHL64r1:
- case X86::SHL8rCL:
- case X86::SHL16rCL:
- case X86::SHL32rCL:
- case X86::SHL64rCL:
- case X86::SHL8ri:
- case X86::SHL16ri:
- case X86::SHL32ri:
- case X86::SHL64ri:
- case X86::SHR8r1:
- case X86::SHR16r1:
- case X86::SHR32r1:
- case X86::SHR64r1:
- case X86::SHR8rCL:
- case X86::SHR16rCL:
- case X86::SHR32rCL:
- case X86::SHR64rCL:
- case X86::SHR8ri:
- case X86::SHR16ri:
- case X86::SHR32ri:
- case X86::SHR64ri:
- case X86::SHLD16rrCL:
- case X86::SHLD32rrCL:
- case X86::SHLD64rrCL:
- case X86::SHLD16rri8:
- case X86::SHLD32rri8:
- case X86::SHLD64rri8:
- case X86::SHRD16rrCL:
- case X86::SHRD32rrCL:
- case X86::SHRD64rrCL:
- case X86::SHRD16rri8:
- case X86::SHRD32rri8:
- case X86::SHRD64rri8:
-
+ if (isROL(Opcode) || isROR(Opcode) || isSAR(Opcode) || isSHL(Opcode) ||
+ isSHR(Opcode) || isSHLD(Opcode) || isSHRD(Opcode))
+ return true;
// Basic arithmetic is constant time on the input but does set flags.
- case X86::ADC8rr:
- case X86::ADC8ri:
- case X86::ADC16rr:
- case X86::ADC16ri:
- case X86::ADC16ri8:
- case X86::ADC32rr:
- case X86::ADC32ri:
- case X86::ADC32ri8:
- case X86::ADC64rr:
- case X86::ADC64ri8:
- case X86::ADC64ri32:
- case X86::ADD8rr:
- case X86::ADD8ri:
- case X86::ADD16rr:
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD32rr:
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD64rr:
- case X86::ADD64ri8:
- case X86::ADD64ri32:
- case X86::AND8rr:
- case X86::AND8ri:
- case X86::AND16rr:
- case X86::AND16ri:
- case X86::AND16ri8:
- case X86::AND32rr:
- case X86::AND32ri:
- case X86::AND32ri8:
- case X86::AND64rr:
- case X86::AND64ri8:
- case X86::AND64ri32:
- case X86::OR8rr:
- case X86::OR8ri:
- case X86::OR16rr:
- case X86::OR16ri:
- case X86::OR16ri8:
- case X86::OR32rr:
- case X86::OR32ri:
- case X86::OR32ri8:
- case X86::OR64rr:
- case X86::OR64ri8:
- case X86::OR64ri32:
- case X86::SBB8rr:
- case X86::SBB8ri:
- case X86::SBB16rr:
- case X86::SBB16ri:
- case X86::SBB16ri8:
- case X86::SBB32rr:
- case X86::SBB32ri:
- case X86::SBB32ri8:
- case X86::SBB64rr:
- case X86::SBB64ri8:
- case X86::SBB64ri32:
- case X86::SUB8rr:
- case X86::SUB8ri:
- case X86::SUB16rr:
- case X86::SUB16ri:
- case X86::SUB16ri8:
- case X86::SUB32rr:
- case X86::SUB32ri:
- case X86::SUB32ri8:
- case X86::SUB64rr:
- case X86::SUB64ri8:
- case X86::SUB64ri32:
- case X86::XOR8rr:
- case X86::XOR8ri:
- case X86::XOR16rr:
- case X86::XOR16ri:
- case X86::XOR16ri8:
- case X86::XOR32rr:
- case X86::XOR32ri:
- case X86::XOR32ri8:
- case X86::XOR64rr:
- case X86::XOR64ri8:
- case X86::XOR64ri32:
+ if (isADC(Opcode) || isADD(Opcode) || isAND(Opcode) || isOR(Opcode) ||
+ isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode))
+ return true;
// Arithmetic with just 32-bit and 64-bit variants and no immediates.
- case X86::ADCX32rr:
- case X86::ADCX64rr:
- case X86::ADOX32rr:
- case X86::ADOX64rr:
- case X86::ANDN32rr:
- case X86::ANDN64rr:
+ if (isADCX(Opcode) || isADOX(Opcode) || isANDN(Opcode))
+ return true;
// Unary arithmetic operations.
- case X86::DEC8r:
- case X86::DEC16r:
- case X86::DEC32r:
- case X86::DEC64r:
- case X86::INC8r:
- case X86::INC16r:
- case X86::INC32r:
- case X86::INC64r:
- case X86::NEG8r:
- case X86::NEG16r:
- case X86::NEG32r:
- case X86::NEG64r:
-
+ if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode))
+ return true;
// Unlike other arithmetic, NOT doesn't set EFLAGS.
- case X86::NOT8r:
- case X86::NOT16r:
- case X86::NOT32r:
- case X86::NOT64r:
-
+ if (isNOT(Opcode))
+ return true;
// Various move instructions used to zero or sign extend things. Note that we
// intentionally don't support the _NOREX variants as we can't handle that
// register constraint anyways.
- case X86::MOVSX16rr8:
- case X86::MOVSX32rr8:
- case X86::MOVSX32rr16:
- case X86::MOVSX64rr8:
- case X86::MOVSX64rr16:
- case X86::MOVSX64rr32:
- case X86::MOVZX16rr8:
- case X86::MOVZX32rr8:
- case X86::MOVZX32rr16:
- case X86::MOVZX64rr8:
- case X86::MOVZX64rr16:
- case X86::MOV32rr:
-
+ if (isMOVSX(Opcode) || isMOVZX(Opcode) || isMOVSXD(Opcode) || isMOV(Opcode))
+ return true;
// Arithmetic instructions that are both constant time and don't set flags.
- case X86::RORX32ri:
- case X86::RORX64ri:
- case X86::SARX32rr:
- case X86::SARX64rr:
- case X86::SHLX32rr:
- case X86::SHLX64rr:
- case X86::SHRX32rr:
- case X86::SHRX64rr:
-
+ if (isRORX(Opcode) || isSARX(Opcode) || isSHLX(Opcode) || isSHRX(Opcode))
+ return true;
// LEA doesn't actually access memory, and its arithmetic is constant time.
- case X86::LEA16r:
- case X86::LEA32r:
- case X86::LEA64_32r:
- case X86::LEA64r:
+ if (isLEA(Opcode))
return true;
- }
+ // By default, assume that the instruction is not data invariant.
+ return false;
}
bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
@@ -990,6 +765,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::AVX_SET0:
case X86::FsFLD0SD:
case X86::FsFLD0SS:
+ case X86::FsFLD0SH:
case X86::FsFLD0F128:
case X86::KSET0D:
case X86::KSET0Q:
@@ -1192,6 +968,102 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
return ShAmt < 4 && ShAmt > 0;
}
+static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
+ MachineInstr &CmpValDefInstr,
+ const MachineRegisterInfo *MRI,
+ MachineInstr **AndInstr,
+ const TargetRegisterInfo *TRI,
+ bool &NoSignFlag, bool &ClearsOverflowFlag) {
+ if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
+ return false;
+
+ if (CmpInstr.getOpcode() != X86::TEST64rr)
+ return false;
+
+ // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
+ // guarantees that it's analyzable only if two registers are identical.
+ assert(
+ (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
+ "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
+ "requires two reg operands are the same.");
+
+ // Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
+ // `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
+ // if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
+ // redundant.
+ assert(
+ (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
+ "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
+
+ // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
+ // 0.
+ if (CmpValDefInstr.getOperand(1).getImm() != 0)
+ return false;
+
+ // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
+ // sub_32bit or sub_xmm.
+ if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
+ return false;
+
+ MachineInstr *VregDefInstr =
+ MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
+
+ assert(VregDefInstr && "Must have a definition (SSA)");
+
+ // Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB
+ // to simplify the subsequent analysis.
+ //
+ // FIXME: If `VregDefInstr->getParent()` is the only predecessor of
+ // `CmpValDefInstr.getParent()`, this could be handled.
+ if (VregDefInstr->getParent() != CmpValDefInstr.getParent())
+ return false;
+
+ if (X86::isAND(VregDefInstr->getOpcode())) {
+ // Get a sequence of instructions like
+ // %reg = and* ... // Set EFLAGS
+ // ... // EFLAGS not changed
+ // %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
+ // test64rr %extended_reg, %extended_reg, implicit-def $eflags
+ //
+ // If subsequent readers use a subset of bits that don't change
+ // after `and*` instructions, it's likely that the test64rr could
+ // be optimized away.
+ for (const MachineInstr &Instr :
+ make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)),
+ MachineBasicBlock::iterator(CmpValDefInstr))) {
+ // There are instructions between 'VregDefInstr' and
+ // 'CmpValDefInstr' that modifies EFLAGS.
+ if (Instr.modifiesRegister(X86::EFLAGS, TRI))
+ return false;
+ }
+
+ *AndInstr = VregDefInstr;
+
+ // AND instruction will essentially update SF and clear OF, so
+ // NoSignFlag should be false in the sense that SF is modified by `AND`.
+ //
+ // However, the implementation artifically sets `NoSignFlag` to true
+ // to poison the SF bit; that is to say, if SF is looked at later, the
+ // optimization (to erase TEST64rr) will be disabled.
+ //
+ // The reason to poison SF bit is that SF bit value could be different
+ // in the `AND` and `TEST` operation; signed bit is not known for `AND`,
+ // and is known to be 0 as a result of `TEST64rr`.
+ //
+ // FIXME: As opposed to poisoning the SF bit directly, consider peeking into
+ // the AND instruction and using the static information to guide peephole
+ // optimization if possible. For example, it's possible to fold a
+ // conditional move into a copy if the relevant EFLAG bits could be deduced
+ // from an immediate operand of and operation.
+ //
+ NoSignFlag = true;
+ // ClearsOverflowFlag is true for AND operation (no surprise).
+ ClearsOverflowFlag = true;
+ return true;
+ }
+ return false;
+}
+
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
@@ -1314,8 +1186,11 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
case X86::SHL8ri:
case X86::SHL16ri: {
unsigned ShAmt = MI.getOperand(2).getImm();
- MIB.addReg(0).addImm(1ULL << ShAmt)
- .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
+ MIB.addReg(0)
+ .addImm(1LL << ShAmt)
+ .addReg(InRegLEA, RegState::Kill)
+ .addImm(0)
+ .addReg(0);
break;
}
case X86::INC8r:
@@ -1478,7 +1353,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
.add(Dest)
.addReg(0)
- .addImm(1ULL << ShAmt)
+ .addImm(1LL << ShAmt)
.add(Src)
.addImm(0)
.addReg(0);
@@ -1502,7 +1377,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.addReg(0)
- .addImm(1ULL << ShAmt)
+ .addImm(1LL << ShAmt)
.addReg(SrcReg, getKillRegState(isKill))
.addImm(0)
.addReg(0);
@@ -1957,14 +1832,13 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
FMAForms[0] = FMA3Group.get132Opcode();
FMAForms[1] = FMA3Group.get213Opcode();
FMAForms[2] = FMA3Group.get231Opcode();
- unsigned FormIndex;
- for (FormIndex = 0; FormIndex < 3; FormIndex++)
- if (Opc == FMAForms[FormIndex])
- break;
// Everything is ready, just adjust the FMA opcode and return it.
- FormIndex = FormMapping[Case][FormIndex];
- return FMAForms[FormIndex];
+ for (unsigned FormIndex = 0; FormIndex < 3; FormIndex++)
+ if (Opc == FMAForms[FormIndex])
+ return FMAForms[FormMapping[Case][FormIndex]];
+
+ llvm_unreachable("Illegal FMA3 format");
}
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
@@ -2141,7 +2015,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
- WorkingMI.RemoveOperand(3);
+ WorkingMI.removeOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
/*NewMI=*/false,
OpIdx1, OpIdx2);
@@ -2238,7 +2112,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(X86::MOVSDrr));
- WorkingMI.RemoveOperand(3);
+ WorkingMI.removeOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@@ -2813,34 +2687,37 @@ bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
return false;
}
+int X86::getCondSrcNoFromDesc(const MCInstrDesc &MCID) {
+ unsigned Opcode = MCID.getOpcode();
+ if (!(X86::isJCC(Opcode) || X86::isSETCC(Opcode) || X86::isCMOVCC(Opcode)))
+ return -1;
+ // Assume that condition code is always the last use operand.
+ unsigned NumUses = MCID.getNumOperands() - MCID.getNumDefs();
+ return NumUses - 1;
+}
+
+X86::CondCode X86::getCondFromMI(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ int CondNo = getCondSrcNoFromDesc(MCID);
+ if (CondNo < 0)
+ return X86::COND_INVALID;
+ CondNo += MCID.getNumDefs();
+ return static_cast<X86::CondCode>(MI.getOperand(CondNo).getImm());
+}
+
X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default: return X86::COND_INVALID;
- case X86::JCC_1:
- return static_cast<X86::CondCode>(
- MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
- }
+ return X86::isJCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
+ : X86::COND_INVALID;
}
-/// Return condition code of a SETCC opcode.
X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default: return X86::COND_INVALID;
- case X86::SETCCr: case X86::SETCCm:
- return static_cast<X86::CondCode>(
- MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
- }
+ return X86::isSETCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
+ : X86::COND_INVALID;
}
-/// Return condition code of a CMov opcode.
X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default: return X86::COND_INVALID;
- case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr:
- case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm:
- return static_cast<X86::CondCode>(
- MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
- }
+ return X86::isCMOVCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
+ : X86::COND_INVALID;
}
/// Return the inverse of the specified condition,
@@ -3166,8 +3043,7 @@ bool X86InstrInfo::AnalyzeBranchImpl(
}
// If the block has any instructions after a JMP, delete them.
- while (std::next(I) != MBB.end())
- std::next(I)->eraseFromParent();
+ MBB.erase(std::next(I), MBB.end());
Cond.clear();
FBB = nullptr;
@@ -3464,7 +3340,7 @@ bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
Register FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
- if (!Subtarget.hasCMov())
+ if (!Subtarget.canUseCMOV())
return false;
if (Cond.size() != 1)
return false;
@@ -3708,10 +3584,6 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
- if (X86::FR16XRegClass.hasSubClassEq(RC)) {
- assert(STI.hasFP16());
- return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
- }
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return load ? X86::MOV16rm : X86::MOV16mr;
case 4:
@@ -3739,6 +3611,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
X86::VK16PAIRRegClass.hasSubClassEq(RC))
return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
+ if ((X86::FR16RegClass.hasSubClassEq(RC) ||
+ X86::FR16XRegClass.hasSubClassEq(RC)) &&
+ STI.hasFP16())
+ return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
llvm_unreachable("Unknown 4-byte regclass");
case 8:
if (X86::GR64RegClass.hasSubClassEq(RC))
@@ -3845,6 +3721,35 @@ X86InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
return AM;
}
+bool X86InstrInfo::verifyInstruction(const MachineInstr &MI,
+ StringRef &ErrInfo) const {
+ Optional<ExtAddrMode> AMOrNone = getAddrModeFromMemoryOp(MI, nullptr);
+ if (!AMOrNone)
+ return true;
+
+ ExtAddrMode AM = *AMOrNone;
+
+ if (AM.ScaledReg != X86::NoRegister) {
+ switch (AM.Scale) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ ErrInfo = "Scale factor in address must be 1, 2, 4 or 8";
+ return false;
+ }
+ }
+ if (!isInt<32>(AM.Displacement)) {
+ ErrInfo = "Displacement in address must fit into 32-bit signed "
+ "integer";
+ return false;
+ }
+
+ return true;
+}
+
bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
const Register Reg,
int64_t &ImmVal) const {
@@ -3949,12 +3854,12 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
"Stack slot too small for store");
if (RC->getID() == X86::TILERegClassID) {
unsigned Opc = X86::TILESTORED;
// tilestored %tmm, (%sp, %idx)
- MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
MachineInstr *NewMI =
@@ -3963,6 +3868,14 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineOperand &MO = NewMI->getOperand(2);
MO.setReg(VirtReg);
MO.setIsKill(true);
+ } else if ((RC->getID() == X86::FR16RegClassID ||
+ RC->getID() == X86::FR16XRegClassID) &&
+ !Subtarget.hasFP16()) {
+ unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZmr
+ : Subtarget.hasAVX() ? X86::VMOVSSmr
+ : X86::MOVSSmr;
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
} else {
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
@@ -3991,6 +3904,14 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineOperand &MO = NewMI->getOperand(3);
MO.setReg(VirtReg);
MO.setIsKill(true);
+ } else if ((RC->getID() == X86::FR16RegClassID ||
+ RC->getID() == X86::FR16XRegClassID) &&
+ !Subtarget.hasFP16()) {
+ unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZrm
+ : Subtarget.hasAVX() ? X86::VMOVSSrm
+ : X86::MOVSSrm;
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
+ FrameIdx);
} else {
const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -4375,7 +4296,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
}
CmpInstr.setDesc(get(NewOpcode));
- CmpInstr.RemoveOperand(0);
+ CmpInstr.removeOperand(0);
// Mutating this instruction invalidates any debug data associated with it.
CmpInstr.dropDebugNumber();
// Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
@@ -4423,6 +4344,23 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
MI = &Inst;
break;
}
+
+ // Look back for the following pattern, in which case the test64rr
+ // instruction could be erased.
+ //
+ // Example:
+ // %reg = and32ri %in_reg, 5
+ // ... // EFLAGS not changed.
+ // %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
+ // test64rr %src_reg, %src_reg, implicit-def $eflags
+ MachineInstr *AndInstr = nullptr;
+ if (IsCmpZero &&
+ findRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI,
+ NoSignFlag, ClearsOverflowFlag)) {
+ assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode()));
+ MI = AndInstr;
+ break;
+ }
// Cannot find other candidates before definition of SrcReg.
return false;
}
@@ -4524,6 +4462,11 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return false;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
+ // If SF is used, but the instruction doesn't update the SF, then we
+ // can't do the optimization.
+ if (NoSignFlag)
+ return false;
+ LLVM_FALLTHROUGH;
case X86::COND_O: case X86::COND_NO:
// If OF is used, the instruction needs to clear it like CmpZero does.
if (!ClearsOverflowFlag)
@@ -4811,7 +4754,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP32r));
}
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
MIB->addImplicitDefUseOperands(*MBB.getParent());
// Build CFI if necessary.
@@ -4918,7 +4861,7 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
MIB->setDesc(Desc);
int64_t ShiftAmt = MIB->getOperand(2).getImm();
// Temporarily remove the immediate so we can add another source register.
- MIB->RemoveOperand(2);
+ MIB->removeOperand(2);
// Add the register. Don't copy the kill flag if there is one.
MIB.addReg(MIB.getReg(1),
getUndefRegState(MIB->getOperand(1).isUndef()));
@@ -4949,6 +4892,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
+ case X86::FsFLD0SH:
case X86::FsFLD0F128:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0: {
@@ -5026,7 +4970,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned MaskState = getRegState(MIB->getOperand(1));
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
MIB->setDesc(get(Opc));
// VPTERNLOG needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
@@ -5165,6 +5109,255 @@ static bool hasPartialRegUpdate(unsigned Opcode,
case X86::SQRTSDr_Int:
case X86::SQRTSDm_Int:
return true;
+ case X86::VFCMULCPHZ128rm:
+ case X86::VFCMULCPHZ128rmb:
+ case X86::VFCMULCPHZ128rmbkz:
+ case X86::VFCMULCPHZ128rmkz:
+ case X86::VFCMULCPHZ128rr:
+ case X86::VFCMULCPHZ128rrkz:
+ case X86::VFCMULCPHZ256rm:
+ case X86::VFCMULCPHZ256rmb:
+ case X86::VFCMULCPHZ256rmbkz:
+ case X86::VFCMULCPHZ256rmkz:
+ case X86::VFCMULCPHZ256rr:
+ case X86::VFCMULCPHZ256rrkz:
+ case X86::VFCMULCPHZrm:
+ case X86::VFCMULCPHZrmb:
+ case X86::VFCMULCPHZrmbkz:
+ case X86::VFCMULCPHZrmkz:
+ case X86::VFCMULCPHZrr:
+ case X86::VFCMULCPHZrrb:
+ case X86::VFCMULCPHZrrbkz:
+ case X86::VFCMULCPHZrrkz:
+ case X86::VFMULCPHZ128rm:
+ case X86::VFMULCPHZ128rmb:
+ case X86::VFMULCPHZ128rmbkz:
+ case X86::VFMULCPHZ128rmkz:
+ case X86::VFMULCPHZ128rr:
+ case X86::VFMULCPHZ128rrkz:
+ case X86::VFMULCPHZ256rm:
+ case X86::VFMULCPHZ256rmb:
+ case X86::VFMULCPHZ256rmbkz:
+ case X86::VFMULCPHZ256rmkz:
+ case X86::VFMULCPHZ256rr:
+ case X86::VFMULCPHZ256rrkz:
+ case X86::VFMULCPHZrm:
+ case X86::VFMULCPHZrmb:
+ case X86::VFMULCPHZrmbkz:
+ case X86::VFMULCPHZrmkz:
+ case X86::VFMULCPHZrr:
+ case X86::VFMULCPHZrrb:
+ case X86::VFMULCPHZrrbkz:
+ case X86::VFMULCPHZrrkz:
+ case X86::VFCMULCSHZrm:
+ case X86::VFCMULCSHZrmkz:
+ case X86::VFCMULCSHZrr:
+ case X86::VFCMULCSHZrrb:
+ case X86::VFCMULCSHZrrbkz:
+ case X86::VFCMULCSHZrrkz:
+ case X86::VFMULCSHZrm:
+ case X86::VFMULCSHZrmkz:
+ case X86::VFMULCSHZrr:
+ case X86::VFMULCSHZrrb:
+ case X86::VFMULCSHZrrbkz:
+ case X86::VFMULCSHZrrkz:
+ return Subtarget.hasMULCFalseDeps();
+ case X86::VPERMDYrm:
+ case X86::VPERMDYrr:
+ case X86::VPERMQYmi:
+ case X86::VPERMQYri:
+ case X86::VPERMPSYrm:
+ case X86::VPERMPSYrr:
+ case X86::VPERMPDYmi:
+ case X86::VPERMPDYri:
+ case X86::VPERMDZ256rm:
+ case X86::VPERMDZ256rmb:
+ case X86::VPERMDZ256rmbkz:
+ case X86::VPERMDZ256rmkz:
+ case X86::VPERMDZ256rr:
+ case X86::VPERMDZ256rrkz:
+ case X86::VPERMDZrm:
+ case X86::VPERMDZrmb:
+ case X86::VPERMDZrmbkz:
+ case X86::VPERMDZrmkz:
+ case X86::VPERMDZrr:
+ case X86::VPERMDZrrkz:
+ case X86::VPERMQZ256mbi:
+ case X86::VPERMQZ256mbikz:
+ case X86::VPERMQZ256mi:
+ case X86::VPERMQZ256mikz:
+ case X86::VPERMQZ256ri:
+ case X86::VPERMQZ256rikz:
+ case X86::VPERMQZ256rm:
+ case X86::VPERMQZ256rmb:
+ case X86::VPERMQZ256rmbkz:
+ case X86::VPERMQZ256rmkz:
+ case X86::VPERMQZ256rr:
+ case X86::VPERMQZ256rrkz:
+ case X86::VPERMQZmbi:
+ case X86::VPERMQZmbikz:
+ case X86::VPERMQZmi:
+ case X86::VPERMQZmikz:
+ case X86::VPERMQZri:
+ case X86::VPERMQZrikz:
+ case X86::VPERMQZrm:
+ case X86::VPERMQZrmb:
+ case X86::VPERMQZrmbkz:
+ case X86::VPERMQZrmkz:
+ case X86::VPERMQZrr:
+ case X86::VPERMQZrrkz:
+ case X86::VPERMPSZ256rm:
+ case X86::VPERMPSZ256rmb:
+ case X86::VPERMPSZ256rmbkz:
+ case X86::VPERMPSZ256rmkz:
+ case X86::VPERMPSZ256rr:
+ case X86::VPERMPSZ256rrkz:
+ case X86::VPERMPSZrm:
+ case X86::VPERMPSZrmb:
+ case X86::VPERMPSZrmbkz:
+ case X86::VPERMPSZrmkz:
+ case X86::VPERMPSZrr:
+ case X86::VPERMPSZrrkz:
+ case X86::VPERMPDZ256mbi:
+ case X86::VPERMPDZ256mbikz:
+ case X86::VPERMPDZ256mi:
+ case X86::VPERMPDZ256mikz:
+ case X86::VPERMPDZ256ri:
+ case X86::VPERMPDZ256rikz:
+ case X86::VPERMPDZ256rm:
+ case X86::VPERMPDZ256rmb:
+ case X86::VPERMPDZ256rmbkz:
+ case X86::VPERMPDZ256rmkz:
+ case X86::VPERMPDZ256rr:
+ case X86::VPERMPDZ256rrkz:
+ case X86::VPERMPDZmbi:
+ case X86::VPERMPDZmbikz:
+ case X86::VPERMPDZmi:
+ case X86::VPERMPDZmikz:
+ case X86::VPERMPDZri:
+ case X86::VPERMPDZrikz:
+ case X86::VPERMPDZrm:
+ case X86::VPERMPDZrmb:
+ case X86::VPERMPDZrmbkz:
+ case X86::VPERMPDZrmkz:
+ case X86::VPERMPDZrr:
+ case X86::VPERMPDZrrkz:
+ return Subtarget.hasPERMFalseDeps();
+ case X86::VRANGEPDZ128rmbi:
+ case X86::VRANGEPDZ128rmbikz:
+ case X86::VRANGEPDZ128rmi:
+ case X86::VRANGEPDZ128rmikz:
+ case X86::VRANGEPDZ128rri:
+ case X86::VRANGEPDZ128rrikz:
+ case X86::VRANGEPDZ256rmbi:
+ case X86::VRANGEPDZ256rmbikz:
+ case X86::VRANGEPDZ256rmi:
+ case X86::VRANGEPDZ256rmikz:
+ case X86::VRANGEPDZ256rri:
+ case X86::VRANGEPDZ256rrikz:
+ case X86::VRANGEPDZrmbi:
+ case X86::VRANGEPDZrmbikz:
+ case X86::VRANGEPDZrmi:
+ case X86::VRANGEPDZrmikz:
+ case X86::VRANGEPDZrri:
+ case X86::VRANGEPDZrrib:
+ case X86::VRANGEPDZrribkz:
+ case X86::VRANGEPDZrrikz:
+ case X86::VRANGEPSZ128rmbi:
+ case X86::VRANGEPSZ128rmbikz:
+ case X86::VRANGEPSZ128rmi:
+ case X86::VRANGEPSZ128rmikz:
+ case X86::VRANGEPSZ128rri:
+ case X86::VRANGEPSZ128rrikz:
+ case X86::VRANGEPSZ256rmbi:
+ case X86::VRANGEPSZ256rmbikz:
+ case X86::VRANGEPSZ256rmi:
+ case X86::VRANGEPSZ256rmikz:
+ case X86::VRANGEPSZ256rri:
+ case X86::VRANGEPSZ256rrikz:
+ case X86::VRANGEPSZrmbi:
+ case X86::VRANGEPSZrmbikz:
+ case X86::VRANGEPSZrmi:
+ case X86::VRANGEPSZrmikz:
+ case X86::VRANGEPSZrri:
+ case X86::VRANGEPSZrrib:
+ case X86::VRANGEPSZrribkz:
+ case X86::VRANGEPSZrrikz:
+ case X86::VRANGESDZrmi:
+ case X86::VRANGESDZrmikz:
+ case X86::VRANGESDZrri:
+ case X86::VRANGESDZrrib:
+ case X86::VRANGESDZrribkz:
+ case X86::VRANGESDZrrikz:
+ case X86::VRANGESSZrmi:
+ case X86::VRANGESSZrmikz:
+ case X86::VRANGESSZrri:
+ case X86::VRANGESSZrrib:
+ case X86::VRANGESSZrribkz:
+ case X86::VRANGESSZrrikz:
+ return Subtarget.hasRANGEFalseDeps();
+ case X86::VGETMANTSSZrmi:
+ case X86::VGETMANTSSZrmikz:
+ case X86::VGETMANTSSZrri:
+ case X86::VGETMANTSSZrrib:
+ case X86::VGETMANTSSZrribkz:
+ case X86::VGETMANTSSZrrikz:
+ case X86::VGETMANTSDZrmi:
+ case X86::VGETMANTSDZrmikz:
+ case X86::VGETMANTSDZrri:
+ case X86::VGETMANTSDZrrib:
+ case X86::VGETMANTSDZrribkz:
+ case X86::VGETMANTSDZrrikz:
+ case X86::VGETMANTSHZrmi:
+ case X86::VGETMANTSHZrmikz:
+ case X86::VGETMANTSHZrri:
+ case X86::VGETMANTSHZrrib:
+ case X86::VGETMANTSHZrribkz:
+ case X86::VGETMANTSHZrrikz:
+ case X86::VGETMANTPSZ128rmbi:
+ case X86::VGETMANTPSZ128rmbikz:
+ case X86::VGETMANTPSZ128rmi:
+ case X86::VGETMANTPSZ128rmikz:
+ case X86::VGETMANTPSZ256rmbi:
+ case X86::VGETMANTPSZ256rmbikz:
+ case X86::VGETMANTPSZ256rmi:
+ case X86::VGETMANTPSZ256rmikz:
+ case X86::VGETMANTPSZrmbi:
+ case X86::VGETMANTPSZrmbikz:
+ case X86::VGETMANTPSZrmi:
+ case X86::VGETMANTPSZrmikz:
+ case X86::VGETMANTPDZ128rmbi:
+ case X86::VGETMANTPDZ128rmbikz:
+ case X86::VGETMANTPDZ128rmi:
+ case X86::VGETMANTPDZ128rmikz:
+ case X86::VGETMANTPDZ256rmbi:
+ case X86::VGETMANTPDZ256rmbikz:
+ case X86::VGETMANTPDZ256rmi:
+ case X86::VGETMANTPDZ256rmikz:
+ case X86::VGETMANTPDZrmbi:
+ case X86::VGETMANTPDZrmbikz:
+ case X86::VGETMANTPDZrmi:
+ case X86::VGETMANTPDZrmikz:
+ return Subtarget.hasGETMANTFalseDeps();
+ case X86::VPMULLQZ128rm:
+ case X86::VPMULLQZ128rmb:
+ case X86::VPMULLQZ128rmbkz:
+ case X86::VPMULLQZ128rmkz:
+ case X86::VPMULLQZ128rr:
+ case X86::VPMULLQZ128rrkz:
+ case X86::VPMULLQZ256rm:
+ case X86::VPMULLQZ256rmb:
+ case X86::VPMULLQZ256rmbkz:
+ case X86::VPMULLQZ256rmkz:
+ case X86::VPMULLQZ256rr:
+ case X86::VPMULLQZ256rrkz:
+ case X86::VPMULLQZrm:
+ case X86::VPMULLQZrmb:
+ case X86::VPMULLQZrmbkz:
+ case X86::VPMULLQZrmkz:
+ case X86::VPMULLQZrr:
+ case X86::VPMULLQZrrkz:
+ return Subtarget.hasMULLQFalseDeps();
// GPR
case X86::POPCNT32rm:
case X86::POPCNT32rr:
@@ -5591,6 +5784,28 @@ void X86InstrInfo::breakPartialRegDependency(
.addReg(XReg, RegState::Undef)
.addReg(Reg, RegState::ImplicitDefine);
MI.addRegisterKilled(Reg, TRI, true);
+ } else if (X86::VR128XRegClass.contains(Reg)) {
+ // Only handle VLX targets.
+ if (!Subtarget.hasVLX())
+ return;
+ // Since vxorps requires AVX512DQ, vpxord should be the best choice.
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MI.addRegisterKilled(Reg, TRI, true);
+ } else if (X86::VR256XRegClass.contains(Reg) ||
+ X86::VR512RegClass.contains(Reg)) {
+ // Only handle VLX targets.
+ if (!Subtarget.hasVLX())
+ return;
+ // Use vpxord to clear the full ymm/zmm register.
+ // It wants to read and write the xmm sub-register.
+ Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), XReg)
+ .addReg(XReg, RegState::Undef)
+ .addReg(XReg, RegState::Undef)
+ .addReg(Reg, RegState::ImplicitDefine);
+ MI.addRegisterKilled(Reg, TRI, true);
} else if (X86::GR64RegClass.contains(Reg)) {
// Using XOR32rr because it has shorter encoding and zeros up the upper bits
// as well.
@@ -6413,6 +6628,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_FsFLD0SS:
Alignment = Align(4);
break;
+ case X86::FsFLD0SH:
case X86::AVX512_FsFLD0SH:
Alignment = Align(2);
break;
@@ -6451,6 +6667,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
+ case X86::FsFLD0SH:
case X86::AVX512_FsFLD0SH:
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
@@ -6490,7 +6707,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = Type::getDoubleTy(MF.getFunction().getContext());
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
- else if (Opc == X86::AVX512_FsFLD0SH)
+ else if (Opc == X86::FsFLD0SH || Opc == X86::AVX512_FsFLD0SH)
Ty = Type::getHalfTy(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
@@ -7170,7 +7387,7 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
// ENDBR instructions should not be scheduled around.
unsigned Opcode = MI.getOpcode();
if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
- Opcode == X86::LDTILECFG)
+ Opcode == X86::PLDTILECFGV)
return true;
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
@@ -9298,12 +9515,10 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
// We check to see if CFI Instructions are present, and if they are
// we find the number of CFI Instructions in the candidates.
unsigned CFICount = 0;
- MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
- for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
- Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- if (MBBI->isCFIInstruction())
+ for (auto &I : make_range(RepeatedSequenceLocs[0].front(),
+ std::next(RepeatedSequenceLocs[0].back()))) {
+ if (I.isCFIInstruction())
CFICount++;
- MBBI++;
}
// We compare the number of found CFI Instructions to the number of CFI
@@ -9440,7 +9655,7 @@ MachineBasicBlock::iterator
X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It,
MachineFunction &MF,
- const outliner::Candidate &C) const {
+ outliner::Candidate &C) const {
// Is it a tail call?
if (C.CallConstructionID == MachineOutlinerTailCall) {
// Yes, just insert a JMP.