//===- DFAPacketizerEmitter.cpp - Packetization DFA for a VLIW machine ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This class parses the Schedule.td file and produces an API that can be used // to reason about whether an instruction can be added to a packet on a VLIW // architecture. The class internally generates a deterministic finite // automaton (DFA) that models all possible mappings of machine instructions // to functional units as instructions are added to a packet. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dfa-emitter" #include "CodeGenTarget.h" #include "DFAEmitter.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include using namespace llvm; // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp // DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput. // This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer. // // e.g. terms x resource bit combinations that fit in uint32_t: // 4 terms x 8 bits = 32 bits // 3 terms x 10 bits = 30 bits // 2 terms x 16 bits = 32 bits // // e.g. terms x resource bit combinations that fit in uint64_t: // 8 terms x 8 bits = 64 bits // 7 terms x 9 bits = 63 bits // 6 terms x 10 bits = 60 bits // 5 terms x 12 bits = 60 bits // 4 terms x 16 bits = 64 bits <--- current // 3 terms x 21 bits = 63 bits // 2 terms x 32 bits = 64 bits // #define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. #define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. typedef uint64_t DFAInput; typedef int64_t DFAStateInput; #define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. namespace { DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { return (Inp << DFA_MAX_RESOURCES) | FuncUnits; } /// Return the DFAInput for an instruction class input vector. /// This function is used in both DFAPacketizer.cpp and in /// DFAPacketizerEmitter.cpp. DFAInput getDFAInsnInput(const std::vector &InsnClass) { DFAInput InsnInput = 0; assert((InsnClass.size() <= DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA terms"); for (auto U : InsnClass) InsnInput = addDFAFuncUnits(InsnInput, U); return InsnInput; } } // end anonymous namespace // -------------------------------------------------------------------- #ifndef NDEBUG // To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter". // // dbgsInsnClass - When debugging, print instruction class stages. // void dbgsInsnClass(const std::vector &InsnClass); // // dbgsStateInfo - When debugging, print the set of state info. // void dbgsStateInfo(const std::set &stateInfo); // // dbgsIndent - When debugging, indent by the specified amount. // void dbgsIndent(unsigned indent); #endif // // class DFAPacketizerEmitter: class that generates and prints out the DFA // for resource tracking. // namespace { class DFAPacketizerEmitter { private: std::string TargetName; // // allInsnClasses is the set of all possible resources consumed by an // InstrStage. // std::vector> allInsnClasses; RecordKeeper &Records; public: DFAPacketizerEmitter(RecordKeeper &R); // // collectAllFuncUnits - Construct a map of function unit names to bits. // int collectAllFuncUnits(std::vector &ProcItinList, std::map &FUNameToBitsMap, int &maxResources, raw_ostream &OS); // // collectAllComboFuncs - Construct a map from a combo function unit bit to // the bits of all included functional units. // int collectAllComboFuncs(std::vector &ComboFuncList, std::map &FUNameToBitsMap, std::map &ComboBitToBitsMap, raw_ostream &OS); // // collectOneInsnClass - Populate allInsnClasses with one instruction class. // int collectOneInsnClass(const std::string &ProcName, std::vector &ProcItinList, std::map &FUNameToBitsMap, Record *ItinData, raw_ostream &OS); // // collectAllInsnClasses - Populate allInsnClasses which is a set of units // used in each stage. // int collectAllInsnClasses(const std::string &ProcName, std::vector &ProcItinList, std::map &FUNameToBitsMap, std::vector &ItinDataList, int &maxStages, raw_ostream &OS); // Emit code for a subset of itineraries. void emitForItineraries(raw_ostream &OS, std::vector &ProcItinList, std::string DFAName); void run(raw_ostream &OS); }; } // end anonymous namespace #ifndef NDEBUG // To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter". // // dbgsInsnClass - When debugging, print instruction class stages. // void dbgsInsnClass(const std::vector &InsnClass) { LLVM_DEBUG(dbgs() << "InsnClass: "); for (unsigned i = 0; i < InsnClass.size(); ++i) { if (i > 0) { LLVM_DEBUG(dbgs() << ", "); } LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i])); } DFAInput InsnInput = getDFAInsnInput(InsnClass); LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")"); } // // dbgsIndent - When debugging, indent by the specified amount. // void dbgsIndent(unsigned indent) { for (unsigned i = 0; i < indent; ++i) { LLVM_DEBUG(dbgs() << " "); } } #endif // NDEBUG DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R): TargetName(CodeGenTarget(R).getName()), Records(R) {} // // collectAllFuncUnits - Construct a map of function unit names to bits. // int DFAPacketizerEmitter::collectAllFuncUnits( std::vector &ProcItinList, std::map &FUNameToBitsMap, int &maxFUs, raw_ostream &OS) { LLVM_DEBUG(dbgs() << "-------------------------------------------------------" "----------------------\n"); LLVM_DEBUG(dbgs() << "collectAllFuncUnits"); LLVM_DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n"); int totalFUs = 0; // Parse functional units for all the itineraries. for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) { Record *Proc = ProcItinList[i]; std::vector FUs = Proc->getValueAsListOfDefs("FU"); LLVM_DEBUG(dbgs() << " FU:" << i << " (" << FUs.size() << " FUs) " << Proc->getName()); // Convert macros to bits for each stage. unsigned numFUs = FUs.size(); for (unsigned j = 0; j < numFUs; ++j) { assert ((j < DFA_MAX_RESOURCES) && "Exceeded maximum number of representable resources"); unsigned FuncResources = (unsigned) (1U << j); FUNameToBitsMap[FUs[j]->getName()] = FuncResources; LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x" << Twine::utohexstr(FuncResources)); } if (((int) numFUs) > maxFUs) { maxFUs = numFUs; } totalFUs += numFUs; LLVM_DEBUG(dbgs() << "\n"); } return totalFUs; } // // collectAllComboFuncs - Construct a map from a combo function unit bit to // the bits of all included functional units. // int DFAPacketizerEmitter::collectAllComboFuncs( std::vector &ComboFuncList, std::map &FUNameToBitsMap, std::map &ComboBitToBitsMap, raw_ostream &OS) { LLVM_DEBUG(dbgs() << "-------------------------------------------------------" "----------------------\n"); LLVM_DEBUG(dbgs() << "collectAllComboFuncs"); LLVM_DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n"); int numCombos = 0; for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) { Record *Func = ComboFuncList[i]; std::vector FUs = Func->getValueAsListOfDefs("CFD"); LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) " << Func->getName() << "\n"); // Convert macros to bits for each stage. for (unsigned j = 0, N = FUs.size(); j < N; ++j) { assert ((j < DFA_MAX_RESOURCES) && "Exceeded maximum number of DFA resources"); Record *FuncData = FUs[j]; Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc"); const std::vector &FuncList = FuncData->getValueAsListOfDefs("FuncList"); const std::string &ComboFuncName = ComboFunc->getName(); unsigned ComboBit = FUNameToBitsMap[ComboFuncName]; unsigned ComboResources = ComboBit; LLVM_DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x" << Twine::utohexstr(ComboResources) << "\n"); for (unsigned k = 0, M = FuncList.size(); k < M; ++k) { std::string FuncName = FuncList[k]->getName(); unsigned FuncResources = FUNameToBitsMap[FuncName]; LLVM_DEBUG(dbgs() << " " << FuncName << ":0x" << Twine::utohexstr(FuncResources) << "\n"); ComboResources |= FuncResources; } ComboBitToBitsMap[ComboBit] = ComboResources; numCombos++; LLVM_DEBUG(dbgs() << " => combo bits: " << ComboFuncName << ":0x" << Twine::utohexstr(ComboBit) << " = 0x" << Twine::utohexstr(ComboResources) << "\n"); } } return numCombos; } // // collectOneInsnClass - Populate allInsnClasses with one instruction class // int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName, std::vector &ProcItinList, std::map &FUNameToBitsMap, Record *ItinData, raw_ostream &OS) { const std::vector &StageList = ItinData->getValueAsListOfDefs("Stages"); // The number of stages. unsigned NStages = StageList.size(); LLVM_DEBUG(dbgs() << " " << ItinData->getValueAsDef("TheClass")->getName() << "\n"); std::vector UnitBits; // Compute the bitwise or of each unit used in this stage. for (unsigned i = 0; i < NStages; ++i) { const Record *Stage = StageList[i]; // Get unit list. const std::vector &UnitList = Stage->getValueAsListOfDefs("Units"); LLVM_DEBUG(dbgs() << " stage:" << i << " [" << UnitList.size() << " units]:"); unsigned dbglen = 26; // cursor after stage dbgs // Compute the bitwise or of each unit used in this stage. unsigned UnitBitValue = 0; for (unsigned j = 0, M = UnitList.size(); j < M; ++j) { // Conduct bitwise or. std::string UnitName = UnitList[j]->getName(); LLVM_DEBUG(dbgs() << " " << j << ":" << UnitName); dbglen += 3 + UnitName.length(); assert(FUNameToBitsMap.count(UnitName)); UnitBitValue |= FUNameToBitsMap[UnitName]; } if (UnitBitValue != 0) UnitBits.push_back(UnitBitValue); while (dbglen <= 64) { // line up bits dbgs dbglen += 8; LLVM_DEBUG(dbgs() << "\t"); } LLVM_DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue) << ")\n"); } if (!UnitBits.empty()) allInsnClasses.push_back(UnitBits); LLVM_DEBUG({ dbgs() << " "; dbgsInsnClass(UnitBits); dbgs() << "\n"; }); return NStages; } // // collectAllInsnClasses - Populate allInsnClasses which is a set of units // used in each stage. // int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName, std::vector &ProcItinList, std::map &FUNameToBitsMap, std::vector &ItinDataList, int &maxStages, raw_ostream &OS) { // Collect all instruction classes. unsigned M = ItinDataList.size(); int numInsnClasses = 0; LLVM_DEBUG(dbgs() << "-------------------------------------------------------" "----------------------\n" << "collectAllInsnClasses " << ProcName << " (" << M << " classes)\n"); // Collect stages for each instruction class for all itinerary data for (unsigned j = 0; j < M; j++) { Record *ItinData = ItinDataList[j]; int NStages = collectOneInsnClass(ProcName, ProcItinList, FUNameToBitsMap, ItinData, OS); if (NStages > maxStages) { maxStages = NStages; } numInsnClasses++; } return numInsnClasses; } // // Run the worklist algorithm to generate the DFA. // void DFAPacketizerEmitter::run(raw_ostream &OS) { OS << "\n" << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n"; OS << "namespace llvm {\n"; OS << "\n// Input format:\n"; OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS << "\t// maximum AND'ed resource terms\n"; OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES << "\t// maximum resource bits in one term\n"; // Collect processor iteraries. std::vector ProcItinList = Records.getAllDerivedDefinitions("ProcessorItineraries"); std::unordered_map> ItinsByNamespace; for (Record *R : ProcItinList) ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R); for (auto &KV : ItinsByNamespace) emitForItineraries(OS, KV.second, KV.first); OS << "} // end namespace llvm\n"; } void DFAPacketizerEmitter::emitForItineraries( raw_ostream &OS, std::vector &ProcItinList, std::string DFAName) { // // Collect the Functional units. // std::map FUNameToBitsMap; int maxResources = 0; collectAllFuncUnits(ProcItinList, FUNameToBitsMap, maxResources, OS); // // Collect the Combo Functional units. // std::map ComboBitToBitsMap; std::vector ComboFuncList = Records.getAllDerivedDefinitions("ComboFuncUnits"); collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS); // // Collect the itineraries. // int maxStages = 0; int numInsnClasses = 0; for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) { Record *Proc = ProcItinList[i]; // Get processor itinerary name. const std::string &ProcName = Proc->getName(); // Skip default. if (ProcName == "NoItineraries") continue; // Sanity check for at least one instruction itinerary class. unsigned NItinClasses = Records.getAllDerivedDefinitions("InstrItinClass").size(); if (NItinClasses == 0) return; // Get itinerary data list. std::vector ItinDataList = Proc->getValueAsListOfDefs("IID"); // Collect all instruction classes numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList, FUNameToBitsMap, ItinDataList, maxStages, OS); } // The type of a state in the nondeterministic automaton we're defining. using NfaStateTy = unsigned; // Given a resource state, return all resource states by applying // InsnClass. auto applyInsnClass = [&](ArrayRef InsnClass, NfaStateTy State) -> std::deque { std::deque V(1, State); // Apply every stage in the class individually. for (unsigned Stage : InsnClass) { // Apply this stage to every existing member of V in turn. size_t Sz = V.size(); for (unsigned I = 0; I < Sz; ++I) { unsigned S = V.front(); V.pop_front(); // For this stage, state combination, try all possible resources. for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) { unsigned ResourceMask = 1U << J; if ((ResourceMask & Stage) == 0) // This resource isn't required by this stage. continue; unsigned Combo = ComboBitToBitsMap[ResourceMask]; if (Combo && ((~S & Combo) != Combo)) // This combo units bits are not available. continue; unsigned ResultingResourceState = S | ResourceMask | Combo; if (ResultingResourceState == S) continue; V.push_back(ResultingResourceState); } } } return V; }; // Given a resource state, return a quick (conservative) guess as to whether // InsnClass can be applied. This is a filter for the more heavyweight // applyInsnClass. auto canApplyInsnClass = [](ArrayRef InsnClass, NfaStateTy State) -> bool { for (unsigned Resources : InsnClass) { if ((State | Resources) == State) return false; } return true; }; DfaEmitter Emitter; std::deque Worklist(1, 0); std::set SeenStates; SeenStates.insert(Worklist.front()); while (!Worklist.empty()) { NfaStateTy State = Worklist.front(); Worklist.pop_front(); for (unsigned i = 0; i < allInsnClasses.size(); i++) { const std::vector &InsnClass = allInsnClasses[i]; if (!canApplyInsnClass(InsnClass, State)) continue; for (unsigned NewState : applyInsnClass(InsnClass, State)) { if (SeenStates.emplace(NewState).second) Worklist.emplace_back(NewState); Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass)); } } } OS << "} // end namespace llvm\n\n"; OS << "namespace {\n"; std::string TargetAndDFAName = TargetName + DFAName; Emitter.emit(TargetAndDFAName, OS); OS << "} // end anonymous namespace\n\n"; std::string SubTargetClassName = TargetName + "GenSubtargetInfo"; OS << "namespace llvm {\n"; OS << "DFAPacketizer *" << SubTargetClassName << "::" << "create" << DFAName << "DFAPacketizer(const InstrItineraryData *IID) const {\n" << " static Automaton A(ArrayRef<" << TargetAndDFAName << "Transition>(" << TargetAndDFAName << "Transitions), " << TargetAndDFAName << "TransitionInfo);\n" << " return new DFAPacketizer(IID, A);\n" << "\n}\n\n"; } namespace llvm { void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS) { emitSourceFileHeader("Target DFA Packetizer Tables", OS); DFAPacketizerEmitter(RK).run(OS); } } // end namespace llvm