diff options
Diffstat (limited to 'lib/Target/SystemZ/SystemZHazardRecognizer.cpp')
-rw-r--r-- | lib/Target/SystemZ/SystemZHazardRecognizer.cpp | 45 |
1 files changed, 40 insertions, 5 deletions
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index d300d1d88abc..b9e5788cf018 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -55,7 +55,7 @@ getNumDecoderSlots(SUnit *SU) const { else return 3; // Expanded/group-alone instruction } - + return 1; // Normal instruction } @@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) { void SystemZHazardRecognizer::Reset() { CurrGroupSize = 0; + CurrGroupHas4RegOps = false; clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; @@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { if (SC->BeginGroup) return (CurrGroupSize == 0); + // An instruction with 4 register operands will not fit in last slot. + assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) && + "Current decoder group is already full!"); + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return false; + // Since a full group is handled immediately in EmitInstruction(), // SU should fit into current group. NumSlots should be 1 or 0, // since it is not a cracked or expanded instruction. @@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { return true; } +bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + const MCInstrDesc &MID = MI->getDesc(); + unsigned Count = 0; + for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); + if (RC == nullptr) + continue; + if (OpIdx >= MID.getNumDefs() && + MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + continue; + Count++; + } + return Count >= 4; +} + void SystemZHazardRecognizer::nextGroup() { if (CurrGroupSize == 0) return; @@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup() { // Reset counter for next group. CurrGroupSize = 0; + CurrGroupHas4RegOps = false; // Decrease counters for execution units by one. for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) @@ -142,7 +167,7 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; - + for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { @@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "/EndsGroup"; if (SU->isUnbuffered) OS << "/Unbuffered"; + if (has4RegOps(SU->getInstr())) + OS << "/4RegOps"; } void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { @@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { dbgs() << "{ " << CurGroupDbg << " }"; dbgs() << " (" << CurrGroupSize << " decoder slot" << (CurrGroupSize > 1 ? "s":"") + << (CurrGroupHas4RegOps ? ", 4RegOps" : "") << ")\n"; } } @@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) { // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); - assert (CurrGroupSize <= 3); + CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); + unsigned GroupLim = + ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3); + assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!"); // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == 3 || SC->EndGroup) + if (CurrGroupSize == GroupLim || SC->EndGroup) nextGroup(); } @@ -306,7 +337,7 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; - + // If SU begins new group, it can either break a current group early // or fit naturally if current group is empty (negative cost). if (SC->BeginGroup) { @@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { return -1; } + // An instruction with 4 register operands will not fit in last slot. + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return 1; + // Most instructions can be placed in any decoder slot. return 0; } |