summaryrefslogtreecommitdiff
path: root/lib/CodeGen/MachineCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/MachineCombiner.cpp')
-rw-r--r--lib/CodeGen/MachineCombiner.cpp134
1 files changed, 101 insertions, 33 deletions
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index e6f80dbb8630..702d21228477 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -16,17 +16,17 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -34,6 +34,11 @@ using namespace llvm;
STATISTIC(NumInstCombined, "Number of machineinst combined");
+static cl::opt<unsigned>
+inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
+ cl::desc("Incremental depth computation will be used for basic "
+ "blocks with more instructions."), cl::init(500));
+
namespace {
class MachineCombiner : public MachineFunctionPass {
const TargetInstrInfo *TII;
@@ -73,7 +78,7 @@ private:
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern);
+ MachineCombinerPattern Pattern, bool SlackIsAccurate);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -155,9 +160,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
assert(DefInstr &&
"There must be a definition for a new virtual register");
DepthOp = InstrDepth[II->second];
- LatencyOp = TSchedModel.computeOperandLatency(
- DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
- InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
+ int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
+ LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
+ InstrPtr, UseIdx);
} else {
MachineInstr *DefInstr = getOperandDef(MO);
if (DefInstr) {
@@ -247,7 +253,8 @@ bool MachineCombiner::improvesCriticalPathLen(
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern) {
+ MachineCombinerPattern Pattern,
+ bool SlackIsAccurate) {
assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
"Missing machine model\n");
// NewRoot is the last instruction in the \p InsInstrs vector.
@@ -258,7 +265,7 @@ bool MachineCombiner::improvesCriticalPathLen(
unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
- DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
dbgs() << " RootDepth: " << RootDepth << "\n");
@@ -274,24 +281,32 @@ bool MachineCombiner::improvesCriticalPathLen(
// of the original code sequence. This may allow the transform to proceed
// even if the instruction depths (data dependency cycles) become worse.
- unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
- unsigned RootLatency = 0;
+ // Account for the latency of the inserted and deleted instructions by
+ // adding up their latencies. This assumes that the inserted and deleted
+ // instructions are dependent instruction chains, which might not hold
+ // in all cases.
+ unsigned NewRootLatency = 0;
+ for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
+ NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
+ NewRootLatency += getLatency(Root, NewRoot, BlockTrace);
+ unsigned RootLatency = 0;
for (auto I : DelInstrs)
RootLatency += TSchedModel.computeInstrLatency(I);
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
-
+ unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+ unsigned OldCycleCount = RootDepth + RootLatency +
+ (SlackIsAccurate ? RootSlack : 0);
DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
dbgs() << " RootLatency: " << RootLatency << "\n";
- dbgs() << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
+ << SlackIsAccurate << "\n";
dbgs() << " NewRootDepth + NewRootLatency = "
- << NewRootDepth + NewRootLatency << "\n";
+ << NewCycleCount << "\n";
dbgs() << " RootDepth + RootLatency + RootSlack = "
- << RootDepth + RootLatency + RootSlack << "\n";);
-
- unsigned NewCycleCount = NewRootDepth + NewRootLatency;
- unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+ << OldCycleCount << "\n";
+ );
return NewCycleCount <= OldCycleCount;
}
@@ -354,17 +369,44 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
+/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
+/// depths if requested.
+///
+/// \param MBB basic block to insert instructions in
+/// \param MI current machine instruction
+/// \param InsInstrs new instructions to insert in \p MBB
+/// \param DelInstrs instruction to delete from \p MBB
+/// \param MinInstr is a pointer to the machine trace information
+/// \param RegUnits set of live registers, needed to compute instruction depths
+/// \param IncrementalUpdate if true, compute instruction depths incrementally,
+/// otherwise invalidate the trace
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
SmallVector<MachineInstr *, 16> InsInstrs,
SmallVector<MachineInstr *, 16> DelInstrs,
- MachineTraceMetrics *Traces) {
+ MachineTraceMetrics::Ensemble *MinInstr,
+ SparseSet<LiveRegUnit> &RegUnits,
+ bool IncrementalUpdate) {
for (auto *InstrPtr : InsInstrs)
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
+
+ for (auto *InstrPtr : DelInstrs) {
InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
- ++NumInstCombined;
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
+ // Erase all LiveRegs defined by the removed instruction
+ for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
+ if (I->MI == InstrPtr)
+ I = RegUnits.erase(I);
+ else
+ I++;
+ }
+ }
+
+ if (IncrementalUpdate)
+ for (auto *InstrPtr : InsInstrs)
+ MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
+ else
+ MinInstr->invalidate(MBB);
+
+ NumInstCombined++;
}
/// Substitute a slow code sequence with a faster one by
@@ -378,9 +420,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
bool Changed = false;
DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+ bool IncrementalUpdate = false;
auto BlockIter = MBB->begin();
+ decltype(BlockIter) LastUpdate;
// Check if the block is in a loop.
const MachineLoop *ML = MLI->getLoopFor(MBB);
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(TRI->getNumRegUnits());
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
@@ -419,9 +468,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
SmallVector<MachineInstr *, 16> InsInstrs;
SmallVector<MachineInstr *, 16> DelInstrs;
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- Traces->verifyAnalysis();
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
unsigned NewInstCount = InsInstrs.size();
@@ -436,23 +482,43 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P))
SubstituteAlways = true;
+ if (IncrementalUpdate) {
+ // Update depths since the last incremental update.
+ MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
+ LastUpdate = BlockIter;
+ }
+
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
break;
} else {
- // Calculating the trace metrics may be expensive,
- // so only do this when necessary.
+ // For big basic blocks, we only compute the full trace the first time
+ // we hit this. We do not invalidate the trace, but instead update the
+ // instruction depths incrementally.
+ // NOTE: Only the instruction depths up to MI are accurate. All other
+ // trace information is not updated.
MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
- InstrIdxForVirtReg, P) &&
+ InstrIdxForVirtReg, P,
+ !IncrementalUpdate) &&
preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ if (MBB->size() > inc_threshold) {
+ // Use incremental depth updates for basic blocks above treshold
+ IncrementalUpdate = true;
+ LastUpdate = BlockIter;
+ }
+
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, IncrementalUpdate);
+
// Eagerly stop after the first pattern fires.
Changed = true;
break;
@@ -467,6 +533,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
}
+ if (Changed && IncrementalUpdate)
+ Traces->invalidate(MBB);
return Changed;
}
@@ -480,7 +548,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- OptSize = MF.getFunction()->optForSize();
+ OptSize = MF.getFunction().optForSize();
DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {