diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachinePipeliner.cpp | 150 |
1 files changed, 121 insertions, 29 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index ef22caa877c95..ef4b02ca9e3ef 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -217,6 +217,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MLI = &getAnalysis<MachineLoopInfo>(); MDT = &getAnalysis<MachineDominatorTree>(); + ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); TII = MF->getSubtarget().getInstrInfo(); RegClassInfo.runOnMachineFunction(*MF); @@ -248,6 +249,12 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { setPragmaPipelineOptions(L); if (!canPipelineLoop(L)) { LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n"); + ORE->emit([&]() { + return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Failed to pipeline loop"; + }); + return Changed; } @@ -259,6 +266,9 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { } void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { + // Reset the pragma for the next loop in iteration. + disabledByPragma = false; + MachineBasicBlock *LBLK = L.getTopBlock(); if (LBLK == nullptr) @@ -306,11 +316,24 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { /// restricted to loops with a single basic block. Make sure that the /// branch in the loop can be analyzed. bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { - if (L.getNumBlocks() != 1) + if (L.getNumBlocks() != 1) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Not a single basic block: " + << ore::NV("NumBlocks", L.getNumBlocks()); + }); return false; + } - if (disabledByPragma) + if (disabledByPragma) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Disabled by Pragma."; + }); return false; + } // Check if the branch can't be understood because we can't do pipelining // if that's the case. @@ -318,25 +341,37 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.FBB = nullptr; LI.BrCond.clear(); if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) { - LLVM_DEBUG( - dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n"); NumFailBranch++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "The branch can't be understood"; + }); return false; } LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { - LLVM_DEBUG( - dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n"); NumFailLoop++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "The loop structure is not supported"; + }); return false; } if (!L.getLoopPreheader()) { - LLVM_DEBUG( - dbgs() << "Preheader not found, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n"); NumFailPreheader++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "No loop preheader found"; + }); return false; } @@ -454,10 +489,13 @@ void SwingSchedulerDAG::schedule() { // Can't schedule a loop without a valid MII. if (MII == 0) { - LLVM_DEBUG( - dbgs() - << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n"); + LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n"); NumFailZeroMII++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Invalid Minimal Initiation Interval: 0"; + }); return; } @@ -466,6 +504,14 @@ void SwingSchedulerDAG::schedule() { LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii << ", we don't pipleline large loops\n"); NumFailLargeMaxMII++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Minimal Initiation Interval too large: " + << ore::NV("MII", (int)MII) << " > " + << ore::NV("SwpMaxMii", SwpMaxMii) << "." + << "Refer to -pipeliner-max-mii."; + }); return; } @@ -508,15 +554,24 @@ void SwingSchedulerDAG::schedule() { if (!Scheduled){ LLVM_DEBUG(dbgs() << "No schedule found, return\n"); NumFailNoSchedule++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Unable to find schedule"; + }); return; } unsigned numStages = Schedule.getMaxStageCount(); // No need to generate pipeline if there are no overlapped iterations. if (numStages == 0) { - LLVM_DEBUG( - dbgs() << "No overlapped iterations, no need to generate pipeline\n"); + LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n"); NumFailZeroStage++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "No need to pipeline - no overlapped iterations in schedule."; + }); return; } // Check that the maximum stage count is less than user-defined limit. @@ -524,9 +579,23 @@ void SwingSchedulerDAG::schedule() { LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages << " : too many stages, abort\n"); NumFailLargeMaxStage++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Too many stages in schedule: " + << ore::NV("numStages", (int)numStages) << " > " + << ore::NV("SwpMaxStages", SwpMaxStages) + << ". Refer to -pipeliner-max-stages."; + }); return; } + Pass.ORE->emit([&]() { + return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(), + Loop.getHeader()) + << "Pipelined succesfully!"; + }); + // Generate the schedule as a ModuloSchedule. DenseMap<MachineInstr *, int> Cycles, Stages; std::vector<MachineInstr *> OrderedInsts; @@ -693,9 +762,13 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { // offset, then mark the dependence as loop carried potentially. const MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; - if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) && - TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { + bool Offset1IsScalable, Offset2IsScalable; + if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, + Offset1IsScalable, TRI) && + TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, + Offset2IsScalable, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && + Offset1IsScalable == Offset2IsScalable && (int)Offset1 < (int)Offset2) { assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && "What happened to the chain edge?"); @@ -802,7 +875,7 @@ void SwingSchedulerDAG::updatePhiDependences() { if (!MI->isPHI()) { SDep Dep(SU, SDep::Data, Reg); Dep.setLatency(0); - ST.adjustSchedDependency(SU, &I, Dep); + ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep); I.addPred(Dep); } else { HasPhiUse = Reg; @@ -905,7 +978,7 @@ namespace { struct FuncUnitSorter { const InstrItineraryData *InstrItins; const MCSubtargetInfo *STI; - DenseMap<unsigned, unsigned> Resources; + DenseMap<InstrStage::FuncUnits, unsigned> Resources; FuncUnitSorter(const TargetSubtargetInfo &TSI) : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {} @@ -913,14 +986,15 @@ struct FuncUnitSorter { // Compute the number of functional unit alternatives needed // at each stage, and take the minimum value. We prioritize the // instructions by the least number of choices first. - unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const { + unsigned minFuncUnits(const MachineInstr *Inst, + InstrStage::FuncUnits &F) const { unsigned SchedClass = Inst->getDesc().getSchedClass(); unsigned min = UINT_MAX; if (InstrItins && !InstrItins->isEmpty()) { for (const InstrStage &IS : make_range(InstrItins->beginStage(SchedClass), InstrItins->endStage(SchedClass))) { - unsigned funcUnits = IS.getUnits(); + InstrStage::FuncUnits funcUnits = IS.getUnits(); unsigned numAlternatives = countPopulation(funcUnits); if (numAlternatives < min) { min = numAlternatives; @@ -966,7 +1040,7 @@ struct FuncUnitSorter { for (const InstrStage &IS : make_range(InstrItins->beginStage(SchedClass), InstrItins->endStage(SchedClass))) { - unsigned FuncUnits = IS.getUnits(); + InstrStage::FuncUnits FuncUnits = IS.getUnits(); if (countPopulation(FuncUnits) == 1) Resources[FuncUnits]++; } @@ -994,7 +1068,7 @@ struct FuncUnitSorter { /// Return true if IS1 has less priority than IS2. bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const { - unsigned F1 = 0, F2 = 0; + InstrStage::FuncUnits F1 = 0, F2 = 0; unsigned MFUs1 = minFuncUnits(IS1, F1); unsigned MFUs2 = minFuncUnits(IS2, F2); if (MFUs1 == MFUs2) @@ -1072,7 +1146,7 @@ unsigned SwingSchedulerDAG::calculateResMII() { } } int Resmii = Resources.size(); - LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n"); + LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n"); // Delete the memory for each of the DFAs that were created earlier. for (ResourceManager *RI : Resources) { ResourceManager *D = RI; @@ -2044,9 +2118,16 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II << ")\n"); - if (scheduleFound) + if (scheduleFound) { Schedule.finalizeSchedule(this); - else + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Schedule found with Initiation Interval: " << ore::NV("II", II) + << ", MaxStageCount: " + << ore::NV("MaxStageCount", Schedule.getMaxStageCount()); + }); + } else Schedule.reset(); return scheduleFound && Schedule.getMaxStageCount() > 0; @@ -2058,7 +2139,12 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + bool OffsetIsScalable; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return false; + + // FIXME: This algorithm assumes instructions have fixed-size offsets. + if (OffsetIsScalable) return false; if (!BaseOp->isReg()) @@ -2236,11 +2322,17 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, const MachineOperand *BaseOpS, *BaseOpD; int64_t OffsetS, OffsetD; + bool OffsetSIsScalable, OffsetDIsScalable; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) || - !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI)) + if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable, + TRI) || + !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable, + TRI)) return true; + assert(!OffsetSIsScalable && !OffsetDIsScalable && + "Expected offsets to be byte offsets"); + if (!BaseOpS->isIdenticalTo(*BaseOpD)) return true; @@ -2352,7 +2444,7 @@ int SMSchedule::earliestCycleInChain(const SDep &Dep) { continue; EarlyCycle = std::min(EarlyCycle, it->second); for (const auto &PI : PrevSU->Preds) - if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output) + if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output) Worklist.push_back(PI); Visited.insert(PrevSU); } @@ -2375,7 +2467,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { continue; LateCycle = std::max(LateCycle, it->second); for (const auto &SI : SuccSU->Succs) - if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output) + if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output) Worklist.push_back(SI); Visited.insert(SuccSU); } |