diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 167 | 
1 files changed, 106 insertions, 61 deletions
| diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index e42701b9c6ca..cf75d531deb2 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1471,41 +1471,48 @@ namespace {  class BaseMemOpClusterMutation : public ScheduleDAGMutation {    struct MemOpInfo {      SUnit *SU; -    const MachineOperand *BaseOp; +    SmallVector<const MachineOperand *, 4> BaseOps;      int64_t Offset; - -    MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) -        : SU(su), BaseOp(Op), Offset(ofs) {} - -    bool operator<(const MemOpInfo &RHS) const { -      if (BaseOp->getType() != RHS.BaseOp->getType()) -        return BaseOp->getType() < RHS.BaseOp->getType(); - -      if (BaseOp->isReg()) -        return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < -               std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, -                               RHS.SU->NodeNum); -      if (BaseOp->isFI()) { -        const MachineFunction &MF = -            *BaseOp->getParent()->getParent()->getParent(); +    unsigned Width; + +    MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps, +              int64_t Offset, unsigned Width) +        : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), +          Width(Width) {} + +    static bool Compare(const MachineOperand *const &A, +                        const MachineOperand *const &B) { +      if (A->getType() != B->getType()) +        return A->getType() < B->getType(); +      if (A->isReg()) +        return A->getReg() < B->getReg(); +      if (A->isFI()) { +        const MachineFunction &MF = *A->getParent()->getParent()->getParent();          const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();          bool StackGrowsDown = TFI.getStackGrowthDirection() ==                                TargetFrameLowering::StackGrowsDown; -        // Can't use tuple comparison here since we might need to use a -        // different order when the stack grows down. -        if (BaseOp->getIndex() != RHS.BaseOp->getIndex()) -          return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex() -                                : BaseOp->getIndex() < RHS.BaseOp->getIndex(); - -        if (Offset != RHS.Offset) -          return Offset < RHS.Offset; - -        return SU->NodeNum < RHS.SU->NodeNum; +        return StackGrowsDown ? A->getIndex() > B->getIndex() +                              : A->getIndex() < B->getIndex();        }        llvm_unreachable("MemOpClusterMutation only supports register or frame "                         "index bases.");      } + +    bool operator<(const MemOpInfo &RHS) const { +      // FIXME: Don't compare everything twice. Maybe use C++20 three way +      // comparison instead when it's available. +      if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(), +                                       RHS.BaseOps.begin(), RHS.BaseOps.end(), +                                       Compare)) +        return true; +      if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(), +                                       BaseOps.begin(), BaseOps.end(), Compare)) +        return false; +      if (Offset != RHS.Offset) +        return Offset < RHS.Offset; +      return SU->NodeNum < RHS.SU->NodeNum; +    }    };    const TargetInstrInfo *TII; @@ -1560,41 +1567,78 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(      ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {    SmallVector<MemOpInfo, 32> MemOpRecords;    for (SUnit *SU : MemOps) { -    const MachineOperand *BaseOp; +    const MachineInstr &MI = *SU->getInstr(); +    SmallVector<const MachineOperand *, 4> BaseOps;      int64_t Offset; -    if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) -      MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); +    bool OffsetIsScalable; +    unsigned Width; +    if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, +                                           OffsetIsScalable, Width, TRI)) { +      MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width)); + +      LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " +                        << Offset << ", OffsetIsScalable: " << OffsetIsScalable +                        << ", Width: " << Width << "\n"); +    } +#ifndef NDEBUG +    for (auto *Op : BaseOps) +      assert(Op); +#endif    }    if (MemOpRecords.size() < 2)      return;    llvm::sort(MemOpRecords); + +  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to +  // cluster mem ops collected within `MemOpRecords` array.    unsigned ClusterLength = 1; +  unsigned CurrentClusterBytes = MemOpRecords[0].Width;    for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { -    SUnit *SUa = MemOpRecords[Idx].SU; -    SUnit *SUb = MemOpRecords[Idx+1].SU; +    // Decision to cluster mem ops is taken based on target dependent logic +    auto MemOpa = MemOpRecords[Idx]; +    auto MemOpb = MemOpRecords[Idx + 1]; +    ++ClusterLength; +    CurrentClusterBytes += MemOpb.Width; +    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, +                                  CurrentClusterBytes)) { +      // Current mem ops pair could not be clustered, reset cluster length, and +      // go to next pair +      ClusterLength = 1; +      CurrentClusterBytes = MemOpb.Width; +      continue; +    } + +    SUnit *SUa = MemOpa.SU; +    SUnit *SUb = MemOpb.SU;      if (SUa->NodeNum > SUb->NodeNum)        std::swap(SUa, SUb); -    if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, -                                 *MemOpRecords[Idx + 1].BaseOp, -                                 ClusterLength) && -        DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { -      LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" -                        << SUb->NodeNum << ")\n"); -      // Copy successor edges from SUa to SUb. Interleaving computation -      // dependent on SUa can prevent load combining due to register reuse. -      // Predecessor edges do not need to be copied from SUb to SUa since nearby -      // loads should have effectively the same inputs. -      for (const SDep &Succ : SUa->Succs) { -        if (Succ.getSUnit() == SUb) -          continue; -        LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum -                          << ")\n"); -        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); -      } -      ++ClusterLength; -    } else + +    // FIXME: Is this check really required? +    if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {        ClusterLength = 1; +      CurrentClusterBytes = MemOpb.Width; +      continue; +    } + +    LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" +                      << SUb->NodeNum << ")\n"); + +    // Copy successor edges from SUa to SUb. Interleaving computation +    // dependent on SUa can prevent load combining due to register reuse. +    // Predecessor edges do not need to be copied from SUb to SUa since +    // nearby loads should have effectively the same inputs. +    for (const SDep &Succ : SUa->Succs) { +      if (Succ.getSUnit() == SUb) +        continue; +      LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum +                        << ")\n"); +      DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); +    } + +    LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength +                      << ", Curr cluster bytes: " << CurrentClusterBytes +                      << "\n");    }  } @@ -1609,7 +1653,7 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {      unsigned ChainPredID = DAG->SUnits.size();      for (const SDep &Pred : SU.Preds) { -      if (Pred.isCtrl()) { +      if (Pred.isCtrl() && !Pred.isArtificial()) {          ChainPredID = Pred.getSUnit()->NodeNum;          break;        } @@ -2389,16 +2433,14 @@ SUnit *SchedBoundary::pickOnlyChoice() {    if (CheckPending)      releasePending(); -  if (CurrMOps > 0) { -    // Defer any ready instrs that now have a hazard. -    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { -      if (checkHazard(*I)) { -        Pending.push(*I); -        I = Available.remove(I); -        continue; -      } -      ++I; +  // Defer any ready instrs that now have a hazard. +  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { +    if (checkHazard(*I)) { +      Pending.push(*I); +      I = Available.remove(I); +      continue;      } +    ++I;    }    for (unsigned i = 0; Available.empty(); ++i) {  //  FIXME: Re-enable assert once PR20057 is resolved. @@ -2720,6 +2762,9 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {    SchedModel = DAG->getSchedModel();    TRI = DAG->TRI; +  if (RegionPolicy.ComputeDFSResult) +    DAG->computeDFSResult(); +    Rem.init(DAG, SchedModel);    Top.init(DAG, SchedModel, &Rem);    Bot.init(DAG, SchedModel, &Rem); @@ -3684,7 +3729,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {    DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}    static std::string getGraphName(const ScheduleDAG *G) { -    return G->MF.getName(); +    return std::string(G->MF.getName());    }    static bool renderGraphFromBottomUp() { | 
