diff options
Diffstat (limited to 'lib/Target/Hexagon/HexagonSubtarget.cpp')
-rw-r--r-- | lib/Target/Hexagon/HexagonSubtarget.cpp | 244 |
1 files changed, 244 insertions, 0 deletions
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index aa0efd4f65e07..fb315a730f397 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -14,6 +14,8 @@ #include "HexagonSubtarget.h" #include "Hexagon.h" #include "HexagonRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include <map> @@ -49,10 +51,24 @@ static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable Hexagon Vector eXtensions")); +static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt<bool> EnableDotCurSched("enable-cur-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true), + cl::desc("Enable the scheduler to generate .cur")); + +static cl::opt<bool> EnableVecFrwdSched("enable-evec-frwd-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon MI Scheduling")); +static cl::opt<bool> EnableSubregLiveness("hexagon-subreg-liveness", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable subregister liveness tracking for Hexagon")); + void HexagonSubtarget::initializeEnvironment() { UseMemOps = false; ModeIEEERndNear = false; @@ -115,6 +131,57 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, UseBSBScheduling = hasV60TOps() && EnableBSBSched; } + +void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { + for (auto &SU : DAG->SUnits) { + if (!SU.isInstr()) + continue; + SmallVector<SDep, 4> Erase; + for (auto &D : SU.Preds) + if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF) + Erase.push_back(D); + for (auto &E : Erase) + SU.removePred(E); + } + + for (auto &SU : DAG->SUnits) { + // Update the latency of chain edges between v60 vector load or store + // instructions to be 1. These instructions cannot be scheduled in the + // same packet. + MachineInstr *MI1 = SU.getInstr(); + auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII); + bool IsStoreMI1 = MI1->mayStore(); + bool IsLoadMI1 = MI1->mayLoad(); + if (!QII->isV60VectorInstruction(MI1) || !(IsStoreMI1 || IsLoadMI1)) + continue; + for (auto &SI : SU.Succs) { + if (SI.getKind() != SDep::Order || SI.getLatency() != 0) + continue; + MachineInstr *MI2 = SI.getSUnit()->getInstr(); + if (!QII->isV60VectorInstruction(MI2)) + continue; + if ((IsStoreMI1 && MI2->mayStore()) || (IsLoadMI1 && MI2->mayLoad())) { + SI.setLatency(1); + SU.setHeightDirty(); + // Change the dependence in the opposite direction too. + for (auto &PI : SI.getSUnit()->Preds) { + if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order) + continue; + PI.setLatency(1); + SI.getSUnit()->setDepthDirty(); + } + } + } + } +} + + +void HexagonSubtarget::getPostRAMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>()); +} + + // Pin the vtable to this file. void HexagonSubtarget::anchor() {} @@ -123,3 +190,180 @@ bool HexagonSubtarget::enableMachineScheduler() const { return !DisableHexagonMISched; return true; } + +bool HexagonSubtarget::enableSubRegLiveness() const { + return EnableSubregLiveness; +} + +// This helper function is responsible for increasing the latency only. +void HexagonSubtarget::updateLatency(MachineInstr *SrcInst, + MachineInstr *DstInst, SDep &Dep) const { + if (!hasV60TOps()) + return; + + auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo()); + + if (EnableVecFrwdSched && QII.addLatencyToSchedule(SrcInst, DstInst)) { + // Vec frwd scheduling. + Dep.setLatency(Dep.getLatency() + 1); + } else if (useBSBScheduling() && + QII.isLateInstrFeedsEarlyInstr(SrcInst, DstInst)) { + // BSB scheduling. + Dep.setLatency(Dep.getLatency() + 1); + } else if (EnableTCLatencySched) { + // TClass latency scheduling. + // Check if SrcInst produces in 2C an operand of DstInst taken in stage 2B. + if (QII.isTC1(SrcInst) || QII.isTC2(SrcInst)) + if (!QII.isTC1(DstInst) && !QII.isTC2(DstInst)) + Dep.setLatency(Dep.getLatency() + 1); + } +} + +/// If the SUnit has a zero latency edge, return the other SUnit. +static SUnit *getZeroLatency(SUnit *N, SmallVector<SDep, 4> &Deps) { + for (auto &I : Deps) + if (I.isAssignedRegDep() && I.getLatency() == 0 && + !I.getSUnit()->getInstr()->isPseudo()) + return I.getSUnit(); + return nullptr; +} + +/// Change the latency between the two SUnits. +void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps, + SUnit *Dst, unsigned Lat) const { + MachineInstr *SrcI = Src->getInstr(); + for (auto &I : Deps) { + if (I.getSUnit() != Dst) + continue; + I.setLatency(Lat); + SUnit *UpdateDst = I.getSUnit(); + updateLatency(SrcI, UpdateDst->getInstr(), I); + // Update the latency of opposite edge too. + for (auto &PI : UpdateDst->Preds) { + if (PI.getSUnit() != Src || !PI.isAssignedRegDep()) + continue; + PI.setLatency(Lat); + updateLatency(SrcI, UpdateDst->getInstr(), PI); + } + } +} + +// Return true if these are the best two instructions to schedule +// together with a zero latency. Only one dependence should have a zero +// latency. If there are multiple choices, choose the best, and change +// ther others, if needed. +bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, + const HexagonInstrInfo *TII) const { + MachineInstr *SrcInst = Src->getInstr(); + MachineInstr *DstInst = Dst->getInstr(); + + if (SrcInst->isPHI() || DstInst->isPHI()) + return false; + + // Check if the Dst instruction is the best candidate first. + SUnit *Best = nullptr; + SUnit *DstBest = nullptr; + SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds); + if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) { + // Check that Src doesn't have a better candidate. + DstBest = getZeroLatency(Src, Src->Succs); + if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum) + Best = Dst; + } + if (Best != Dst) + return false; + + // The caller frequents adds the same dependence twice. If so, then + // return true for this case too. + if (Src == SrcBest && Dst == DstBest) + return true; + + // Reassign the latency for the previous bests, which requires setting + // the dependence edge in both directions. + if (SrcBest != nullptr) + changeLatency(SrcBest, SrcBest->Succs, Dst, 1); + if (DstBest != nullptr) + changeLatency(Src, Src->Succs, DstBest, 1); + // If there is an edge from SrcBest to DstBst, then try to change that + // to 0 now. + if (SrcBest && DstBest) + changeLatency(SrcBest, SrcBest->Succs, DstBest, 0); + + return true; +} + +// Update the latency of a Phi when the Phi bridges two instructions that +// require a multi-cycle latency. +void HexagonSubtarget::changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, + SDep &Dep) const { + if (!SrcInst->isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0) + return; + + for (const SDep &PI : Dst->Preds) { + if (PI.getLatency() != 0) + continue; + Dep.setLatency(2); + break; + } +} + +/// \brief Perform target specific adjustments to the latency of a schedule +/// dependency. +void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, + SDep &Dep) const { + MachineInstr *SrcInst = Src->getInstr(); + MachineInstr *DstInst = Dst->getInstr(); + if (!Src->isInstr() || !Dst->isInstr()) + return; + + const HexagonInstrInfo *QII = static_cast<const HexagonInstrInfo *>(getInstrInfo()); + + // Instructions with .new operands have zero latency. + if (QII->canExecuteInBundle(SrcInst, DstInst) && + isBestZeroLatency(Src, Dst, QII)) { + Dep.setLatency(0); + return; + } + + if (!hasV60TOps()) + return; + + // Don't adjust the latency of post-increment part of the instruction. + if (QII->isPostIncrement(SrcInst) && Dep.isAssignedRegDep()) { + if (SrcInst->mayStore()) + return; + if (Dep.getReg() != SrcInst->getOperand(0).getReg()) + return; + } else if (QII->isPostIncrement(DstInst) && Dep.getKind() == SDep::Anti) { + if (DstInst->mayStore()) + return; + if (Dep.getReg() != DstInst->getOperand(0).getReg()) + return; + } else if (QII->isPostIncrement(DstInst) && DstInst->mayStore() && + Dep.isAssignedRegDep()) { + MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1); + if (Op.isReg() && Dep.getReg() != Op.getReg()) + return; + } + + // Check if we need to change any the latency values when Phis are added. + if (useBSBScheduling() && SrcInst->isPHI()) { + changePhiLatency(SrcInst, Dst, Dep); + return; + } + + // If it's a REG_SEQUENCE, use its destination instruction to determine + // the correct latency. + if (DstInst->isRegSequence() && Dst->NumSuccs == 1) + DstInst = Dst->Succs[0].getSUnit()->getInstr(); + + // Try to schedule uses near definitions to generate .cur. + if (EnableDotCurSched && QII->isToBeScheduledASAP(SrcInst, DstInst) && + isBestZeroLatency(Src, Dst, QII)) { + Dep.setLatency(0); + return; + } + + updateLatency(SrcInst, DstInst, Dep); +} + |