diff options
Diffstat (limited to 'lib/Target/AMDGPU/GCNSchedStrategy.cpp')
| -rw-r--r-- | lib/Target/AMDGPU/GCNSchedStrategy.cpp | 282 | 
1 files changed, 134 insertions, 148 deletions
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 630442625aa3e..8ec46665daf56 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -316,46 +316,57 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,    MFI(*MF.getInfo<SIMachineFunctionInfo>()),    StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(),                                                      *MF.getFunction())), -  MinOccupancy(StartingOccupancy), Stage(0) { +  MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {    DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");  }  void GCNScheduleDAGMILive::schedule() { +  if (Stage == 0) { +    // Just record regions at the first pass. +    Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); +    return; +  } +    std::vector<MachineInstr*> Unsched;    Unsched.reserve(NumRegionInstrs);    for (auto &I : *this)      Unsched.push_back(&I); -  std::pair<unsigned, unsigned> PressureBefore; +  GCNRegPressure PressureBefore;    if (LIS) { -    DEBUG(dbgs() << "Pressure before scheduling:\n"); -    discoverLiveIns(); -    PressureBefore = getRealRegPressure(); +    PressureBefore = Pressure[RegionIdx]; + +    DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; +          GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); +          dbgs() << "Region live-in pressure:  "; +          llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); +          dbgs() << "Region register pressure: "; +          PressureBefore.print(dbgs()));    }    ScheduleDAGMILive::schedule(); -  if (Stage == 0) -    Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); +  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);    if (!LIS)      return;    // Check the results of scheduling.    GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; -  DEBUG(dbgs() << "Pressure after scheduling:\n");    auto PressureAfter = getRealRegPressure(); -  LiveIns.clear(); -  if (PressureAfter.first <= S.SGPRCriticalLimit && -      PressureAfter.second <= S.VGPRCriticalLimit) { +  DEBUG(dbgs() << "Pressure after scheduling: "; PressureAfter.print(dbgs())); + +  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && +      PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { +    Pressure[RegionIdx] = PressureAfter;      DEBUG(dbgs() << "Pressure in desired limits, done.\n");      return;    } -  unsigned WavesAfter = getMaxWaves(PressureAfter.first, -                                    PressureAfter.second, MF); -  unsigned WavesBefore = getMaxWaves(PressureBefore.first, -                                      PressureBefore.second, MF); +  unsigned WavesAfter = getMaxWaves(PressureAfter.getSGPRNum(), +                                    PressureAfter.getVGPRNum(), MF); +  unsigned WavesBefore = getMaxWaves(PressureBefore.getSGPRNum(), +                                     PressureBefore.getVGPRNum(), MF);    DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore <<                    ", after " << WavesAfter << ".\n"); @@ -368,8 +379,10 @@ void GCNScheduleDAGMILive::schedule() {                   << MinOccupancy << ".\n");    } -  if (WavesAfter >= WavesBefore) +  if (WavesAfter >= WavesBefore) { +    Pressure[RegionIdx] = PressureAfter;      return; +  }    DEBUG(dbgs() << "Attempting to revert scheduling.\n");    RegionEnd = RegionBegin; @@ -398,166 +411,139 @@ void GCNScheduleDAGMILive::schedule() {      DEBUG(dbgs() << "Scheduling " << *MI);    }    RegionBegin = Unsched.front()->getIterator(); -  if (Stage == 0) -    Regions.back() = std::make_pair(RegionBegin, RegionEnd); +  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);    placeDebugValues();  } -static inline void setMask(const MachineRegisterInfo &MRI, -                           const SIRegisterInfo *SRI, unsigned Reg, -                           LaneBitmask &PrevMask, LaneBitmask NewMask, -                           unsigned &SGPRs, unsigned &VGPRs) { -  int NewRegs = countPopulation(NewMask.getAsInteger()) - -                countPopulation(PrevMask.getAsInteger()); -  if (SRI->isSGPRReg(MRI, Reg)) -    SGPRs += NewRegs; -  if (SRI->isVGPR(MRI, Reg)) -    VGPRs += NewRegs; -  assert ((int)SGPRs >= 0 && (int)VGPRs >= 0); -  PrevMask = NewMask; +GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { +  GCNDownwardRPTracker RPTracker(*LIS); +  RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); +  return RPTracker.moveMaxPressure();  } -void GCNScheduleDAGMILive::discoverLiveIns() { -  unsigned SGPRs = 0; -  unsigned VGPRs = 0; +void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { +  GCNDownwardRPTracker RPTracker(*LIS); + +  // If the block has the only successor then live-ins of that successor are +  // live-outs of the current block. We can reuse calculated live set if the +  // successor will be sent to scheduling past current block. +  const MachineBasicBlock *OnlySucc = nullptr; +  if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { +    SlotIndexes *Ind = LIS->getSlotIndexes(); +    if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) +      OnlySucc = *MBB->succ_begin(); +  } -  auto &MI = *begin()->getParent()->getFirstNonDebugInstr(); -  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); -  SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex(); -  assert (SI.isValid()); - -  DEBUG(dbgs() << "Region live-ins:"); -  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { -    unsigned Reg = TargetRegisterInfo::index2VirtReg(I); -    if (MRI.reg_nodbg_empty(Reg)) -      continue; -    const LiveInterval &LI = LIS->getInterval(Reg); -    LaneBitmask LaneMask = LaneBitmask::getNone(); -    if (LI.hasSubRanges()) { -      for (const auto &S : LI.subranges()) -        if (S.liveAt(SI)) -          LaneMask |= S.LaneMask; -    } else if (LI.liveAt(SI)) { -      LaneMask = MRI.getMaxLaneMaskForVReg(Reg); -    } +  // Scheduler sends regions from the end of the block upwards. +  size_t CurRegion = RegionIdx; +  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) +    if (Regions[CurRegion].first->getParent() != MBB) +      break; +  --CurRegion; + +  auto I = MBB->begin(); +  auto LiveInIt = MBBLiveIns.find(MBB); +  if (LiveInIt != MBBLiveIns.end()) { +    auto LiveIn = std::move(LiveInIt->second); +    RPTracker.reset(*MBB->begin(), &LiveIn); +    MBBLiveIns.erase(LiveInIt); +  } else { +    I = Regions[CurRegion].first; +    RPTracker.reset(*I); +  } -    if (LaneMask.any()) { -      setMask(MRI, SRI, Reg, LiveIns[Reg], LaneMask, SGPRs, VGPRs); +  for ( ; ; ) { +    I = RPTracker.getNext(); -      DEBUG(dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':' -                   << PrintLaneMask(LiveIns[Reg])); +    if (Regions[CurRegion].first == I) { +      LiveIns[CurRegion] = RPTracker.getLiveRegs(); +      RPTracker.clearMaxPressure();      } -  } -  LiveInPressure = std::make_pair(SGPRs, VGPRs); +    if (Regions[CurRegion].second == I) { +      Pressure[CurRegion] = RPTracker.moveMaxPressure(); +      if (CurRegion-- == RegionIdx) +        break; +    } +    RPTracker.advanceToNext(); +    RPTracker.advanceBeforeNext(); +  } -  DEBUG(dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs -               << "\nVGPR = " << VGPRs << '\n'); +  if (OnlySucc) { +    if (I != MBB->end()) { +      RPTracker.advanceToNext(); +      RPTracker.advance(MBB->end()); +    } +    RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); +    RPTracker.advanceBeforeNext(); +    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); +  }  } -std::pair<unsigned, unsigned> -GCNScheduleDAGMILive::getRealRegPressure() const { -  unsigned SGPRs, MaxSGPRs, VGPRs, MaxVGPRs; -  SGPRs = MaxSGPRs = LiveInPressure.first; -  VGPRs = MaxVGPRs = LiveInPressure.second; - -  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); -  DenseMap<unsigned, LaneBitmask> LiveRegs(LiveIns); +void GCNScheduleDAGMILive::finalizeSchedule() { +  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; +  DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); -  for (const MachineInstr &MI : *this) { -    if (MI.isDebugValue()) -      continue; -    SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex(); -    assert (SI.isValid()); +  LiveIns.resize(Regions.size()); +  Pressure.resize(Regions.size()); -    // Remove dead registers or mask bits. -    for (auto &It : LiveRegs) { -      if (It.second.none()) -        continue; -      const LiveInterval &LI = LIS->getInterval(It.first); -      if (LI.hasSubRanges()) { -        for (const auto &S : LI.subranges()) -          if (!S.liveAt(SI)) -            setMask(MRI, SRI, It.first, It.second, It.second & ~S.LaneMask, -                    SGPRs, VGPRs); -      } else if (!LI.liveAt(SI)) { -        setMask(MRI, SRI, It.first, It.second, LaneBitmask::getNone(), -                SGPRs, VGPRs); -      } -    } +  do { +    Stage++; +    RegionIdx = 0; +    MachineBasicBlock *MBB = nullptr; -    // Add new registers or mask bits. -    for (const auto &MO : MI.defs()) { -      if (!MO.isReg()) -        continue; -      unsigned Reg = MO.getReg(); -      if (!TargetRegisterInfo::isVirtualRegister(Reg)) -        continue; -      unsigned SubRegIdx = MO.getSubReg(); -      LaneBitmask LaneMask = SubRegIdx != 0 -                             ? TRI->getSubRegIndexLaneMask(SubRegIdx) -                             : MRI.getMaxLaneMaskForVReg(Reg); -      LaneBitmask &LM = LiveRegs[Reg]; -      setMask(MRI, SRI, Reg, LM, LM | LaneMask, SGPRs, VGPRs); -    } -    MaxSGPRs = std::max(MaxSGPRs, SGPRs); -    MaxVGPRs = std::max(MaxVGPRs, VGPRs); -  } +    if (Stage > 1) { +      // Retry function scheduling if we found resulting occupancy and it is +      // lower than used for first pass scheduling. This will give more freedom +      // to schedule low register pressure blocks. +      // Code is partially copied from MachineSchedulerBase::scheduleRegions(). -  DEBUG(dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs -               << "\nVGPR = " << MaxVGPRs << '\n'); +      if (!LIS || StartingOccupancy <= MinOccupancy) +        break; -  return std::make_pair(MaxSGPRs, MaxVGPRs); -} +      DEBUG(dbgs() +              << "Retrying function scheduling with lowest recorded occupancy " +              << MinOccupancy << ".\n"); -void GCNScheduleDAGMILive::finalizeSchedule() { -  // Retry function scheduling if we found resulting occupancy and it is -  // lower than used for first pass scheduling. This will give more freedom -  // to schedule low register pressure blocks. -  // Code is partially copied from MachineSchedulerBase::scheduleRegions(). +      S.setTargetOccupancy(MinOccupancy); +    } -  if (!LIS || StartingOccupancy <= MinOccupancy) -    return; +    for (auto Region : Regions) { +      RegionBegin = Region.first; +      RegionEnd = Region.second; -  DEBUG(dbgs() << "Retrying function scheduling with lowest recorded occupancy " -               << MinOccupancy << ".\n"); +      if (RegionBegin->getParent() != MBB) { +        if (MBB) finishBlock(); +        MBB = RegionBegin->getParent(); +        startBlock(MBB); +        if (Stage == 1) +          computeBlockPressure(MBB); +      } -  Stage++; -  GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; -  S.setTargetOccupancy(MinOccupancy); +      unsigned NumRegionInstrs = std::distance(begin(), end()); +      enterRegion(MBB, begin(), end(), NumRegionInstrs); -  MachineBasicBlock *MBB = nullptr; -  for (auto Region : Regions) { -    RegionBegin = Region.first; -    RegionEnd = Region.second; +      // Skip empty scheduling regions (0 or 1 schedulable instructions). +      if (begin() == end() || begin() == std::prev(end())) { +        exitRegion(); +        continue; +      } -    if (RegionBegin->getParent() != MBB) { -      if (MBB) finishBlock(); -      MBB = RegionBegin->getParent(); -      startBlock(MBB); -    } +      DEBUG(dbgs() << "********** MI Scheduling **********\n"); +      DEBUG(dbgs() << MF.getName() +            << ":BB#" << MBB->getNumber() << " " << MBB->getName() +            << "\n  From: " << *begin() << "    To: "; +            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; +            else dbgs() << "End"; +            dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); -    unsigned NumRegionInstrs = std::distance(begin(), end()); -    enterRegion(MBB, begin(), end(), NumRegionInstrs); +      schedule(); -    // Skip empty scheduling regions (0 or 1 schedulable instructions). -    if (begin() == end() || begin() == std::prev(end())) {        exitRegion(); -      continue; +      ++RegionIdx;      } -    DEBUG(dbgs() << "********** MI Scheduling **********\n"); -    DEBUG(dbgs() << MF.getName() -          << ":BB#" << MBB->getNumber() << " " << MBB->getName() -          << "\n  From: " << *begin() << "    To: "; -          if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; -          else dbgs() << "End"; -          dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); +    finishBlock(); -    schedule(); - -    exitRegion(); -  } -  finishBlock(); -  LiveIns.shrink_and_clear(); +  } while (Stage < 2);  }  | 
