aboutsummaryrefslogtreecommitdiff
path: root/utils/TableGen/DFAEmitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'utils/TableGen/DFAEmitter.cpp')
-rw-r--r--utils/TableGen/DFAEmitter.cpp394
1 files changed, 394 insertions, 0 deletions
diff --git a/utils/TableGen/DFAEmitter.cpp b/utils/TableGen/DFAEmitter.cpp
new file mode 100644
index 000000000000..dd3db7c150ba
--- /dev/null
+++ b/utils/TableGen/DFAEmitter.cpp
@@ -0,0 +1,394 @@
+//===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class can produce a generic deterministic finite state automaton (DFA),
+// given a set of possible states and transitions.
+//
+// The input transitions can be nondeterministic - this class will produce the
+// deterministic equivalent state machine.
+//
+// The generated code can run the DFA and produce an accepted / not accepted
+// state and also produce, given a sequence of transitions that results in an
+// accepted state, the sequence of intermediate states. This is useful if the
+// initial automaton was nondeterministic - it allows mapping back from the DFA
+// to the NFA.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "dfa-emitter"
+
+#include "DFAEmitter.h"
+#include "CodeGenTarget.h"
+#include "SequenceToOffsetTable.h"
+#include "TableGenBackends.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DfaEmitter implementation. This is independent of the GenAutomaton backend.
+//===----------------------------------------------------------------------===//
+
+void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {
+ Actions.insert(A);
+ NfaStates.insert(From);
+ NfaStates.insert(To);
+ NfaTransitions[{From, A}].push_back(To);
+ ++NumNfaTransitions;
+}
+
+void DfaEmitter::visitDfaState(DfaState DS) {
+ // For every possible action...
+ auto FromId = DfaStates.idFor(DS);
+ for (action_type A : Actions) {
+ DfaState NewStates;
+ DfaTransitionInfo TI;
+ // For every represented state, word pair in the original NFA...
+ for (state_type &FromState : DS) {
+ // If this action is possible from this state add the transitioned-to
+ // states to NewStates.
+ auto I = NfaTransitions.find({FromState, A});
+ if (I == NfaTransitions.end())
+ continue;
+ for (state_type &ToState : I->second) {
+ NewStates.push_back(ToState);
+ TI.emplace_back(FromState, ToState);
+ }
+ }
+ if (NewStates.empty())
+ continue;
+ // Sort and unique.
+ sort(NewStates);
+ NewStates.erase(std::unique(NewStates.begin(), NewStates.end()),
+ NewStates.end());
+ sort(TI);
+ TI.erase(std::unique(TI.begin(), TI.end()), TI.end());
+ unsigned ToId = DfaStates.insert(NewStates);
+ DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI));
+ }
+}
+
+void DfaEmitter::constructDfa() {
+ DfaState Initial(1, /*NFA initial state=*/0);
+ DfaStates.insert(Initial);
+
+ // Note that UniqueVector starts indices at 1, not zero.
+ unsigned DfaStateId = 1;
+ while (DfaStateId <= DfaStates.size())
+ visitDfaState(DfaStates[DfaStateId++]);
+}
+
+void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
+ constructDfa();
+
+ OS << "// Input NFA has " << NfaStates.size() << " states with "
+ << NumNfaTransitions << " transitions.\n";
+ OS << "// Generated DFA has " << DfaStates.size() << " states with "
+ << DfaTransitions.size() << " transitions.\n\n";
+
+ // Implementation note: We don't bake a simple std::pair<> here as it requires
+ // significantly more effort to parse. A simple test with a large array of
+ // struct-pairs (N=100000) took clang-10 6s to parse. The same array of
+ // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to
+ // define the pair type.
+ //
+ // FIXME: It may make sense to emit these as ULEB sequences instead of
+ // pairs of uint64_t.
+ OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n";
+ OS << "// transition implies a set of NFA transitions. These are referred\n";
+ OS << "// to by index in " << Name << "Transitions[].\n";
+
+ SequenceToOffsetTable<DfaTransitionInfo> Table;
+ std::map<DfaTransitionInfo, unsigned> EmittedIndices;
+ for (auto &T : DfaTransitions)
+ Table.add(T.second.second);
+ Table.layout();
+ OS << "std::array<NfaStatePair, " << Table.size() << "> " << Name
+ << "TransitionInfo = {{\n";
+ Table.emit(
+ OS,
+ [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
+ OS << "{" << P.first << ", " << P.second << "}";
+ },
+ "{0ULL, 0ULL}");
+
+ OS << "}};\n\n";
+
+ OS << "// A transition in the generated " << Name << " DFA.\n";
+ OS << "struct " << Name << "Transition {\n";
+ OS << " unsigned FromDfaState; // The transitioned-from DFA state.\n";
+ OS << " ";
+ printActionType(OS);
+ OS << " Action; // The input symbol that causes this transition.\n";
+ OS << " unsigned ToDfaState; // The transitioned-to DFA state.\n";
+ OS << " unsigned InfoIdx; // Start index into " << Name
+ << "TransitionInfo.\n";
+ OS << "};\n\n";
+
+ OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
+ OS << "// The initial state is 1, not zero.\n";
+ OS << "std::array<" << Name << "Transition, " << DfaTransitions.size() << "> "
+ << Name << "Transitions = {{\n";
+ for (auto &KV : DfaTransitions) {
+ dfa_state_type From = KV.first.first;
+ dfa_state_type To = KV.second.first;
+ action_type A = KV.first.second;
+ unsigned InfoIdx = Table.get(KV.second.second);
+ OS << " {" << From << ", ";
+ printActionValue(A, OS);
+ OS << ", " << To << ", " << InfoIdx << "},\n";
+ }
+ OS << "\n}};\n\n";
+}
+
+void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; }
+
+void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }
+
+//===----------------------------------------------------------------------===//
+// AutomatonEmitter implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+// FIXME: This entire discriminated union could be removed with c++17:
+// using Action = std::variant<Record *, unsigned, std::string>;
+struct Action {
+ Record *R = nullptr;
+ unsigned I = 0;
+ std::string S = nullptr;
+
+ Action() = default;
+ Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {}
+
+ void print(raw_ostream &OS) const {
+ if (R)
+ OS << R->getName();
+ else if (!S.empty())
+ OS << '"' << S << '"';
+ else
+ OS << I;
+ }
+ bool operator<(const Action &Other) const {
+ return std::make_tuple(R, I, S) <
+ std::make_tuple(Other.R, Other.I, Other.S);
+ }
+};
+
+using ActionTuple = std::vector<Action>;
+class Automaton;
+
+class Transition {
+ uint64_t NewState;
+ // The tuple of actions that causes this transition.
+ ActionTuple Actions;
+ // The types of the actions; this is the same across all transitions.
+ SmallVector<std::string, 4> Types;
+
+public:
+ Transition(Record *R, Automaton *Parent);
+ const ActionTuple &getActions() { return Actions; }
+ SmallVector<std::string, 4> getTypes() { return Types; }
+
+ bool canTransitionFrom(uint64_t State);
+ uint64_t transitionFrom(uint64_t State);
+};
+
+class Automaton {
+ RecordKeeper &Records;
+ Record *R;
+ std::vector<Transition> Transitions;
+ /// All possible action tuples, uniqued.
+ UniqueVector<ActionTuple> Actions;
+ /// The fields within each Transition object to find the action symbols.
+ std::vector<StringRef> ActionSymbolFields;
+
+public:
+ Automaton(RecordKeeper &Records, Record *R);
+ void emit(raw_ostream &OS);
+
+ ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; }
+ /// If the type of action A has been overridden (there exists a field
+ /// "TypeOf_A") return that, otherwise return the empty string.
+ StringRef getActionSymbolType(StringRef A);
+};
+
+class AutomatonEmitter {
+ RecordKeeper &Records;
+
+public:
+ AutomatonEmitter(RecordKeeper &R) : Records(R) {}
+ void run(raw_ostream &OS);
+};
+
+/// A DfaEmitter implementation that can print our variant action type.
+class CustomDfaEmitter : public DfaEmitter {
+ const UniqueVector<ActionTuple> &Actions;
+ std::string TypeName;
+
+public:
+ CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName)
+ : Actions(Actions), TypeName(TypeName) {}
+
+ void printActionType(raw_ostream &OS) override;
+ void printActionValue(action_type A, raw_ostream &OS) override;
+};
+} // namespace
+
+void AutomatonEmitter::run(raw_ostream &OS) {
+ for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) {
+ Automaton A(Records, R);
+ OS << "#ifdef GET_" << R->getName() << "_DECL\n";
+ A.emit(OS);
+ OS << "#endif // GET_" << R->getName() << "_DECL\n";
+ }
+}
+
+Automaton::Automaton(RecordKeeper &Records, Record *R)
+ : Records(Records), R(R) {
+ LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n");
+ ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields");
+}
+
+void Automaton::emit(raw_ostream &OS) {
+ StringRef TransitionClass = R->getValueAsString("TransitionClass");
+ for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) {
+ assert(T->isSubClassOf("Transition"));
+ Transitions.emplace_back(T, this);
+ Actions.insert(Transitions.back().getActions());
+ }
+
+ LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions.size()
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Each state has " << Transitions.size()
+ << " potential transitions.\n");
+
+ StringRef Name = R->getName();
+
+ CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action");
+ // Starting from the initial state, build up a list of possible states and
+ // transitions.
+ std::deque<uint64_t> Worklist(1, 0);
+ std::set<uint64_t> SeenStates;
+ unsigned NumTransitions = 0;
+ SeenStates.insert(Worklist.front());
+ while (!Worklist.empty()) {
+ uint64_t State = Worklist.front();
+ Worklist.pop_front();
+ for (Transition &T : Transitions) {
+ if (!T.canTransitionFrom(State))
+ continue;
+ uint64_t NewState = T.transitionFrom(State);
+ if (SeenStates.emplace(NewState).second)
+ Worklist.emplace_back(NewState);
+ ++NumTransitions;
+ Emitter.addTransition(State, NewState, Actions.idFor(T.getActions()));
+ }
+ }
+ LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size()
+ << " states with " << NumTransitions << " transitions.\n");
+
+ const auto &ActionTypes = Transitions.back().getTypes();
+ OS << "// The type of an action in the " << Name << " automaton.\n";
+ if (ActionTypes.size() == 1) {
+ OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n";
+ } else {
+ OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ")
+ << ">;\n";
+ }
+ OS << "\n";
+
+ Emitter.emit(Name, OS);
+}
+
+StringRef Automaton::getActionSymbolType(StringRef A) {
+ Twine Ty = "TypeOf_" + A;
+ if (!R->getValue(Ty.str()))
+ return "";
+ return R->getValueAsString(Ty.str());
+}
+
+Transition::Transition(Record *R, Automaton *Parent) {
+ BitsInit *NewStateInit = R->getValueAsBitsInit("NewState");
+ NewState = 0;
+ assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 &&
+ "State cannot be represented in 64 bits!");
+ for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) {
+ if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) {
+ if (Bit->getValue())
+ NewState |= 1ULL << I;
+ }
+ }
+
+ for (StringRef A : Parent->getActionSymbolFields()) {
+ RecordVal *SymbolV = R->getValue(A);
+ if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) {
+ Actions.emplace_back(R->getValueAsDef(A), 0, "");
+ Types.emplace_back(Ty->getAsString());
+ } else if (isa<IntRecTy>(SymbolV->getType())) {
+ Actions.emplace_back(nullptr, R->getValueAsInt(A), "");
+ Types.emplace_back("unsigned");
+ } else if (isa<StringRecTy>(SymbolV->getType()) ||
+ isa<CodeRecTy>(SymbolV->getType())) {
+ Actions.emplace_back(nullptr, 0, R->getValueAsString(A));
+ Types.emplace_back("std::string");
+ } else {
+ report_fatal_error("Unhandled symbol type!");
+ }
+
+ StringRef TypeOverride = Parent->getActionSymbolType(A);
+ if (!TypeOverride.empty())
+ Types.back() = TypeOverride;
+ }
+}
+
+bool Transition::canTransitionFrom(uint64_t State) {
+ if ((State & NewState) == 0)
+ // The bits we want to set are not set;
+ return true;
+ return false;
+}
+
+uint64_t Transition::transitionFrom(uint64_t State) {
+ return State | NewState;
+}
+
+void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; }
+
+void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
+ const ActionTuple &AT = Actions[A];
+ if (AT.size() > 1)
+ OS << "std::make_tuple(";
+ bool First = true;
+ for (const auto &SingleAction : AT) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ SingleAction.print(OS);
+ }
+ if (AT.size() > 1)
+ OS << ")";
+}
+
+namespace llvm {
+
+void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) {
+ AutomatonEmitter(RK).run(OS);
+}
+
+} // namespace llvm