diff options
Diffstat (limited to 'llvm/utils/TableGen/DFAEmitter.cpp')
| -rw-r--r-- | llvm/utils/TableGen/DFAEmitter.cpp | 394 | 
1 files changed, 394 insertions, 0 deletions
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp new file mode 100644 index 000000000000..dd3db7c150ba --- /dev/null +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -0,0 +1,394 @@ +//===- DFAEmitter.cpp - Finite state automaton emitter --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class can produce a generic deterministic finite state automaton (DFA), +// given a set of possible states and transitions. +// +// The input transitions can be nondeterministic - this class will produce the +// deterministic equivalent state machine. +// +// The generated code can run the DFA and produce an accepted / not accepted +// state and also produce, given a sequence of transitions that results in an +// accepted state, the sequence of intermediate states. This is useful if the +// initial automaton was nondeterministic - it allows mapping back from the DFA +// to the NFA. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "dfa-emitter" + +#include "DFAEmitter.h" +#include "CodeGenTarget.h" +#include "SequenceToOffsetTable.h" +#include "TableGenBackends.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include <cassert> +#include <cstdint> +#include <map> +#include <set> +#include <string> +#include <vector> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DfaEmitter implementation. This is independent of the GenAutomaton backend. +//===----------------------------------------------------------------------===// + +void DfaEmitter::addTransition(state_type From, state_type To, action_type A) { +  Actions.insert(A); +  NfaStates.insert(From); +  NfaStates.insert(To); +  NfaTransitions[{From, A}].push_back(To); +  ++NumNfaTransitions; +} + +void DfaEmitter::visitDfaState(DfaState DS) { +  // For every possible action... +  auto FromId = DfaStates.idFor(DS); +  for (action_type A : Actions) { +    DfaState NewStates; +    DfaTransitionInfo TI; +    // For every represented state, word pair in the original NFA... +    for (state_type &FromState : DS) { +      // If this action is possible from this state add the transitioned-to +      // states to NewStates. +      auto I = NfaTransitions.find({FromState, A}); +      if (I == NfaTransitions.end()) +        continue; +      for (state_type &ToState : I->second) { +        NewStates.push_back(ToState); +        TI.emplace_back(FromState, ToState); +      } +    } +    if (NewStates.empty()) +      continue; +    // Sort and unique. +    sort(NewStates); +    NewStates.erase(std::unique(NewStates.begin(), NewStates.end()), +                    NewStates.end()); +    sort(TI); +    TI.erase(std::unique(TI.begin(), TI.end()), TI.end()); +    unsigned ToId = DfaStates.insert(NewStates); +    DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI)); +  } +} + +void DfaEmitter::constructDfa() { +  DfaState Initial(1, /*NFA initial state=*/0); +  DfaStates.insert(Initial); + +  // Note that UniqueVector starts indices at 1, not zero. +  unsigned DfaStateId = 1; +  while (DfaStateId <= DfaStates.size()) +    visitDfaState(DfaStates[DfaStateId++]); +} + +void DfaEmitter::emit(StringRef Name, raw_ostream &OS) { +  constructDfa(); + +  OS << "// Input NFA has " << NfaStates.size() << " states with " +     << NumNfaTransitions << " transitions.\n"; +  OS << "// Generated DFA has " << DfaStates.size() << " states with " +     << DfaTransitions.size() << " transitions.\n\n"; + +  // Implementation note: We don't bake a simple std::pair<> here as it requires +  // significantly more effort to parse. A simple test with a large array of +  // struct-pairs (N=100000) took clang-10 6s to parse. The same array of +  // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to +  // define the pair type. +  // +  // FIXME: It may make sense to emit these as ULEB sequences instead of +  // pairs of uint64_t. +  OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n"; +  OS << "// transition implies a set of NFA transitions. These are referred\n"; +  OS << "// to by index in " << Name << "Transitions[].\n"; + +  SequenceToOffsetTable<DfaTransitionInfo> Table; +  std::map<DfaTransitionInfo, unsigned> EmittedIndices; +  for (auto &T : DfaTransitions) +    Table.add(T.second.second); +  Table.layout(); +  OS << "std::array<NfaStatePair, " << Table.size() << "> " << Name +     << "TransitionInfo = {{\n"; +  Table.emit( +      OS, +      [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) { +        OS << "{" << P.first << ", " << P.second << "}"; +      }, +      "{0ULL, 0ULL}"); + +  OS << "}};\n\n"; + +  OS << "// A transition in the generated " << Name << " DFA.\n"; +  OS << "struct " << Name << "Transition {\n"; +  OS << "  unsigned FromDfaState; // The transitioned-from DFA state.\n"; +  OS << "  "; +  printActionType(OS); +  OS << " Action;       // The input symbol that causes this transition.\n"; +  OS << "  unsigned ToDfaState;   // The transitioned-to DFA state.\n"; +  OS << "  unsigned InfoIdx;      // Start index into " << Name +     << "TransitionInfo.\n"; +  OS << "};\n\n"; + +  OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n"; +  OS << "// The initial state is 1, not zero.\n"; +  OS << "std::array<" << Name << "Transition, " << DfaTransitions.size() << "> " +     << Name << "Transitions = {{\n"; +  for (auto &KV : DfaTransitions) { +    dfa_state_type From = KV.first.first; +    dfa_state_type To = KV.second.first; +    action_type A = KV.first.second; +    unsigned InfoIdx = Table.get(KV.second.second); +    OS << "  {" << From << ", "; +    printActionValue(A, OS); +    OS << ", " << To << ", " << InfoIdx << "},\n"; +  } +  OS << "\n}};\n\n"; +} + +void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; } + +void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; } + +//===----------------------------------------------------------------------===// +// AutomatonEmitter implementation +//===----------------------------------------------------------------------===// + +namespace { +// FIXME: This entire discriminated union could be removed with c++17: +//   using Action = std::variant<Record *, unsigned, std::string>; +struct Action { +  Record *R = nullptr; +  unsigned I = 0; +  std::string S = nullptr; + +  Action() = default; +  Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {} + +  void print(raw_ostream &OS) const { +    if (R) +      OS << R->getName(); +    else if (!S.empty()) +      OS << '"' << S << '"'; +    else +      OS << I; +  } +  bool operator<(const Action &Other) const { +    return std::make_tuple(R, I, S) < +           std::make_tuple(Other.R, Other.I, Other.S); +  } +}; + +using ActionTuple = std::vector<Action>; +class Automaton; + +class Transition { +  uint64_t NewState; +  // The tuple of actions that causes this transition. +  ActionTuple Actions; +  // The types of the actions; this is the same across all transitions. +  SmallVector<std::string, 4> Types; + +public: +  Transition(Record *R, Automaton *Parent); +  const ActionTuple &getActions() { return Actions; } +  SmallVector<std::string, 4> getTypes() { return Types; } + +  bool canTransitionFrom(uint64_t State); +  uint64_t transitionFrom(uint64_t State); +}; + +class Automaton { +  RecordKeeper &Records; +  Record *R; +  std::vector<Transition> Transitions; +  /// All possible action tuples, uniqued. +  UniqueVector<ActionTuple> Actions; +  /// The fields within each Transition object to find the action symbols. +  std::vector<StringRef> ActionSymbolFields; + +public: +  Automaton(RecordKeeper &Records, Record *R); +  void emit(raw_ostream &OS); + +  ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; } +  /// If the type of action A has been overridden (there exists a field +  /// "TypeOf_A") return that, otherwise return the empty string. +  StringRef getActionSymbolType(StringRef A); +}; + +class AutomatonEmitter { +  RecordKeeper &Records; + +public: +  AutomatonEmitter(RecordKeeper &R) : Records(R) {} +  void run(raw_ostream &OS); +}; + +/// A DfaEmitter implementation that can print our variant action type. +class CustomDfaEmitter : public DfaEmitter { +  const UniqueVector<ActionTuple> &Actions; +  std::string TypeName; + +public: +  CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName) +      : Actions(Actions), TypeName(TypeName) {} + +  void printActionType(raw_ostream &OS) override; +  void printActionValue(action_type A, raw_ostream &OS) override; +}; +} // namespace + +void AutomatonEmitter::run(raw_ostream &OS) { +  for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) { +    Automaton A(Records, R); +    OS << "#ifdef GET_" << R->getName() << "_DECL\n"; +    A.emit(OS); +    OS << "#endif  // GET_" << R->getName() << "_DECL\n"; +  } +} + +Automaton::Automaton(RecordKeeper &Records, Record *R) +    : Records(Records), R(R) { +  LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n"); +  ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields"); +} + +void Automaton::emit(raw_ostream &OS) { +  StringRef TransitionClass = R->getValueAsString("TransitionClass"); +  for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) { +    assert(T->isSubClassOf("Transition")); +    Transitions.emplace_back(T, this); +    Actions.insert(Transitions.back().getActions()); +  } + +  LLVM_DEBUG(dbgs() << "  Action alphabet cardinality: " << Actions.size() +                    << "\n"); +  LLVM_DEBUG(dbgs() << "  Each state has " << Transitions.size() +                    << " potential transitions.\n"); + +  StringRef Name = R->getName(); + +  CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action"); +  // Starting from the initial state, build up a list of possible states and +  // transitions. +  std::deque<uint64_t> Worklist(1, 0); +  std::set<uint64_t> SeenStates; +  unsigned NumTransitions = 0; +  SeenStates.insert(Worklist.front()); +  while (!Worklist.empty()) { +    uint64_t State = Worklist.front(); +    Worklist.pop_front(); +    for (Transition &T : Transitions) { +      if (!T.canTransitionFrom(State)) +        continue; +      uint64_t NewState = T.transitionFrom(State); +      if (SeenStates.emplace(NewState).second) +        Worklist.emplace_back(NewState); +      ++NumTransitions; +      Emitter.addTransition(State, NewState, Actions.idFor(T.getActions())); +    } +  } +  LLVM_DEBUG(dbgs() << "  NFA automaton has " << SeenStates.size() +                    << " states with " << NumTransitions << " transitions.\n"); + +  const auto &ActionTypes = Transitions.back().getTypes(); +  OS << "// The type of an action in the " << Name << " automaton.\n"; +  if (ActionTypes.size() == 1) { +    OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n"; +  } else { +    OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ") +       << ">;\n"; +  } +  OS << "\n"; + +  Emitter.emit(Name, OS); +} + +StringRef Automaton::getActionSymbolType(StringRef A) { +  Twine Ty = "TypeOf_" + A; +  if (!R->getValue(Ty.str())) +    return ""; +  return R->getValueAsString(Ty.str()); +} + +Transition::Transition(Record *R, Automaton *Parent) { +  BitsInit *NewStateInit = R->getValueAsBitsInit("NewState"); +  NewState = 0; +  assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 && +         "State cannot be represented in 64 bits!"); +  for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) { +    if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) { +      if (Bit->getValue()) +        NewState |= 1ULL << I; +    } +  } + +  for (StringRef A : Parent->getActionSymbolFields()) { +    RecordVal *SymbolV = R->getValue(A); +    if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) { +      Actions.emplace_back(R->getValueAsDef(A), 0, ""); +      Types.emplace_back(Ty->getAsString()); +    } else if (isa<IntRecTy>(SymbolV->getType())) { +      Actions.emplace_back(nullptr, R->getValueAsInt(A), ""); +      Types.emplace_back("unsigned"); +    } else if (isa<StringRecTy>(SymbolV->getType()) || +               isa<CodeRecTy>(SymbolV->getType())) { +      Actions.emplace_back(nullptr, 0, R->getValueAsString(A)); +      Types.emplace_back("std::string"); +    } else { +      report_fatal_error("Unhandled symbol type!"); +    } + +    StringRef TypeOverride = Parent->getActionSymbolType(A); +    if (!TypeOverride.empty()) +      Types.back() = TypeOverride; +  } +} + +bool Transition::canTransitionFrom(uint64_t State) { +  if ((State & NewState) == 0) +    // The bits we want to set are not set; +    return true; +  return false; +} + +uint64_t Transition::transitionFrom(uint64_t State) { +  return State | NewState; +} + +void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; } + +void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { +  const ActionTuple &AT = Actions[A]; +  if (AT.size() > 1) +    OS << "std::make_tuple("; +  bool First = true; +  for (const auto &SingleAction : AT) { +    if (!First) +      OS << ", "; +    First = false; +    SingleAction.print(OS); +  } +  if (AT.size() > 1) +    OS << ")"; +} + +namespace llvm { + +void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) { +  AutomatonEmitter(RK).run(OS); +} + +} // namespace llvm  | 
