summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp338
1 files changed, 325 insertions, 13 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 6b7a9299dcfb..4aa0ae6f1959 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3456,11 +3456,11 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+bool X86InstrInfo::AnalyzeBranchImpl(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
+
// Start from the bottom of the block and work up, examining the
// terminator instructions.
MachineBasicBlock::iterator I = MBB.end();
@@ -3558,6 +3558,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
FBB = TBB;
TBB = I->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ CondBranches.push_back(I);
continue;
}
@@ -3595,11 +3596,90 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Update the MachineOperand.
Cond[0].setImm(BranchCode);
+ CondBranches.push_back(I);
}
return false;
}
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ SmallVector<MachineInstr *, 4> CondBranches;
+ return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
+}
+
+bool X86InstrInfo::AnalyzeBranchPredicate(MachineBasicBlock &MBB,
+ MachineBranchPredicate &MBP,
+ bool AllowModify) const {
+ using namespace std::placeholders;
+
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineInstr *, 4> CondBranches;
+ if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
+ AllowModify))
+ return true;
+
+ if (Cond.size() != 1)
+ return true;
+
+ assert(MBP.TrueDest && "expected!");
+
+ if (!MBP.FalseDest)
+ MBP.FalseDest = MBB.getNextNode();
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ MachineInstr *ConditionDef = nullptr;
+ bool SingleUseCondition = true;
+
+ for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
+ if (I->modifiesRegister(X86::EFLAGS, TRI)) {
+ ConditionDef = &*I;
+ break;
+ }
+
+ if (I->readsRegister(X86::EFLAGS, TRI))
+ SingleUseCondition = false;
+ }
+
+ if (!ConditionDef)
+ return true;
+
+ if (SingleUseCondition) {
+ for (auto *Succ : MBB.successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
+ SingleUseCondition = false;
+ }
+
+ MBP.ConditionDef = ConditionDef;
+ MBP.SingleUseCondition = SingleUseCondition;
+
+ // Currently we only recognize the simple pattern:
+ //
+ // test %reg, %reg
+ // je %label
+ //
+ const unsigned TestOpcode =
+ Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
+
+ if (ConditionDef->getOpcode() == TestOpcode &&
+ ConditionDef->getNumOperands() == 3 &&
+ ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
+ (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
+ MBP.LHS = ConditionDef->getOperand(0);
+ MBP.RHS = MachineOperand::CreateImm(0);
+ MBP.Predicate = Cond[0].getImm() == X86::COND_NE
+ ? MachineBranchPredicate::PRED_NE
+ : MachineBranchPredicate::PRED_EQ;
+ return false;
+ }
+
+ return true;
+}
+
unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
@@ -3622,8 +3702,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -3671,7 +3750,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
bool X86InstrInfo::
canInsertSelect(const MachineBasicBlock &MBB,
- const SmallVectorImpl<MachineOperand> &Cond,
+ ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
@@ -3708,8 +3787,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DstReg,
- const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
assert(Cond.size() == 1 && "Invalid Cond array");
@@ -3967,6 +4045,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
}
}
+bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *MemOp, unsigned &BaseReg,
+ unsigned &Offset,
+ const TargetRegisterInfo *TRI) const {
+ const MCInstrDesc &Desc = MemOp->getDesc();
+ int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags, MemOp->getOpcode());
+ if (MemRefBegin < 0)
+ return false;
+
+ MemRefBegin += X86II::getOperandBias(Desc);
+
+ BaseReg = MemOp->getOperand(MemRefBegin + X86::AddrBaseReg).getReg();
+ if (MemOp->getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
+ return false;
+
+ if (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
+ X86::NoRegister)
+ return false;
+
+ const MachineOperand &DispMO = MemOp->getOperand(MemRefBegin + X86::AddrDisp);
+
+ // Displacement can be symbolic
+ if (!DispMO.isImm())
+ return false;
+
+ Offset = DispMO.getImm();
+
+ return (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() ==
+ X86::NoRegister);
+}
+
static unsigned getStoreRegOpcode(unsigned SrcReg,
const TargetRegisterClass *RC,
bool isStackAligned,
@@ -6219,13 +6327,217 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
}
bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
+hasHighOperandLatency(const TargetSchedModel &SchedModel,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx) const {
return isHighLatencyDef(DefMI->getOpcode());
}
+/// If the input instruction is part of a chain of dependent ops that are
+/// suitable for reassociation, return the earlier instruction in the sequence
+/// that defines its first operand, otherwise return a nullptr.
+/// If the instruction's operands must be commuted to be considered a
+/// reassociation candidate, Commuted will be set to true.
+static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
+ unsigned AssocOpcode,
+ bool checkPrevOneUse,
+ bool &Commuted) {
+ if (Inst.getOpcode() != AssocOpcode)
+ return nullptr;
+
+ MachineOperand Op1 = Inst.getOperand(1);
+ MachineOperand Op2 = Inst.getOperand(2);
+
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // We need virtual register definitions.
+ MachineInstr *MI1 = nullptr;
+ MachineInstr *MI2 = nullptr;
+ if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+ if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+ // And they need to be in the trace (otherwise, they won't have a depth).
+ if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB)
+ return nullptr;
+
+ Commuted = false;
+ if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) {
+ std::swap(MI1, MI2);
+ Commuted = true;
+ }
+
+ // Avoid reassociating operands when it won't provide any benefit. If both
+ // operands are produced by instructions of this type, we may already
+ // have the optimal sequence.
+ if (MI2->getOpcode() == AssocOpcode)
+ return nullptr;
+
+ // The instruction must only be used by the other instruction that we
+ // reassociate with.
+ if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+ return nullptr;
+
+ // We must match a simple chain of dependent ops.
+ // TODO: This check is not necessary for the earliest instruction in the
+ // sequence. Instead of a sequence of 3 dependent instructions with the same
+ // opcode, we only need to find a sequence of 2 dependent instructions with
+ // the same opcode plus 1 other instruction that adds to the height of the
+ // trace.
+ if (MI1->getOpcode() != AssocOpcode)
+ return nullptr;
+
+ return MI1;
+}
+
+/// Select a pattern based on how the operands of each associative operation
+/// need to be commuted.
+static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev,
+ bool CommuteRoot) {
+ if (CommutePrev) {
+ if (CommuteRoot)
+ return MachineCombinerPattern::MC_REASSOC_XA_YB;
+ return MachineCombinerPattern::MC_REASSOC_XA_BY;
+ } else {
+ if (CommuteRoot)
+ return MachineCombinerPattern::MC_REASSOC_AX_YB;
+ return MachineCombinerPattern::MC_REASSOC_AX_BY;
+ }
+}
+
+bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
+ if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+ return false;
+
+ // TODO: There are many more associative instruction types to match:
+ // 1. Other forms of scalar FP add (non-AVX)
+ // 2. Other data types (double, integer, vectors)
+ // 3. Other math / logic operations (mul, and, or)
+ unsigned AssocOpcode = X86::VADDSSrr;
+
+ // TODO: There is nothing x86-specific here except the instruction type.
+ // This logic could be hoisted into the machine combiner pass itself.
+ bool CommuteRoot;
+ if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true,
+ CommuteRoot)) {
+ bool CommutePrev;
+ if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP.
+ Patterns.push_back(getPattern(CommutePrev, CommuteRoot));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Attempt the following reassociation to reduce critical path length:
+/// B = A op X (Prev)
+/// C = B op Y (Root)
+/// ===>
+/// B = X op Y
+/// C = A op B
+static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+ MachineFunction *MF = Root.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+ // This array encodes the operand index for each parameter because the
+ // operands may be commuted. Each row corresponds to a pattern value,
+ // and each column specifies the index of A, B, X, Y.
+ unsigned OpIdx[4][4] = {
+ { 1, 1, 2, 2 },
+ { 1, 2, 2, 1 },
+ { 2, 1, 1, 2 },
+ { 2, 2, 1, 1 }
+ };
+
+ MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
+ MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
+ MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
+ MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
+ MachineOperand &OpC = Root.getOperand(0);
+
+ unsigned RegA = OpA.getReg();
+ unsigned RegB = OpB.getReg();
+ unsigned RegX = OpX.getReg();
+ unsigned RegY = OpY.getReg();
+ unsigned RegC = OpC.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA))
+ MRI.constrainRegClass(RegA, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI.constrainRegClass(RegB, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegX))
+ MRI.constrainRegClass(RegX, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegY))
+ MRI.constrainRegClass(RegY, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegC))
+ MRI.constrainRegClass(RegC, RC);
+
+ // Create a new virtual register for the result of (X op Y) instead of
+ // recycling RegB because the MachineCombiner's computation of the critical
+ // path requires a new register definition rather than an existing one.
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ unsigned Opcode = Root.getOpcode();
+ bool KillA = OpA.isKill();
+ bool KillX = OpX.isKill();
+ bool KillY = OpY.isKill();
+
+ // Create new instructions for insertion.
+ MachineInstrBuilder MIB1 =
+ BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegY, getKillRegState(KillY));
+ InsInstrs.push_back(MIB1);
+
+ MachineInstrBuilder MIB2 =
+ BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+ .addReg(RegA, getKillRegState(KillA))
+ .addReg(NewVR, getKillRegState(true));
+ InsInstrs.push_back(MIB2);
+
+ // Record old instructions for deletion.
+ DelInstrs.push_back(&Prev);
+ DelInstrs.push_back(&Root);
+}
+
+void X86InstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root,
+ MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+ // Select the previous instruction in the sequence based on the input pattern.
+ MachineInstr *Prev = nullptr;
+ if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY ||
+ Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY)
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB ||
+ Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB)
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ else
+ llvm_unreachable("Unknown pattern for machine combiner");
+
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+ return;
+}
+
namespace {
/// Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
@@ -6292,7 +6604,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+} // namespace
char CGBR::ID = 0;
FunctionPass*
@@ -6404,7 +6716,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+} // namespace
char LDTLSCleanup::ID = 0;
FunctionPass*