diff options
Diffstat (limited to 'include/llvm/Support/Automaton.h')
-rw-r--r-- | include/llvm/Support/Automaton.h | 253 |
1 files changed, 253 insertions, 0 deletions
diff --git a/include/llvm/Support/Automaton.h b/include/llvm/Support/Automaton.h new file mode 100644 index 000000000000..7c13a698e492 --- /dev/null +++ b/include/llvm/Support/Automaton.h @@ -0,0 +1,253 @@ +//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements class that drive and introspect deterministic finite- +// state automata (DFAs) as generated by TableGen's -gen-automata backend. +// +// For a description of how to define an automaton, see +// include/llvm/TableGen/Automaton.td. +// +// One important detail is that these deterministic automata are created from +// (potentially) nondeterministic definitions. Therefore a unique sequence of +// input symbols will produce one path through the DFA but multiple paths +// through the original NFA. An automaton by default only returns "accepted" or +// "not accepted", but frequently we want to analyze what NFA path was taken. +// Finding a path through the NFA states that results in a DFA state can help +// answer *what* the solution to a problem was, not just that there exists a +// solution. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_AUTOMATON_H +#define LLVM_SUPPORT_AUTOMATON_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include <deque> +#include <map> +#include <memory> +#include <unordered_map> +#include <vector> + +namespace llvm { + +using NfaPath = SmallVector<uint64_t, 4>; + +/// Forward define the pair type used by the automata transition info tables. +/// +/// Experimental results with large tables have shown a significant (multiple +/// orders of magnitude) parsing speedup by using a custom struct here with a +/// trivial constructor rather than std::pair<uint64_t, uint64_t>. +struct NfaStatePair { + uint64_t FromDfaState, ToDfaState; + + bool operator<(const NfaStatePair &Other) const { + return std::make_tuple(FromDfaState, ToDfaState) < + std::make_tuple(Other.FromDfaState, Other.ToDfaState); + } +}; + +namespace internal { +/// The internal class that maintains all possible paths through an NFA based +/// on a path through the DFA. +class NfaTranscriber { +private: + /// Cached transition table. This is a table of NfaStatePairs that contains + /// zero-terminated sequences pointed to by DFA transitions. + ArrayRef<NfaStatePair> TransitionInfo; + + /// A simple linked-list of traversed states that can have a shared tail. The + /// traversed path is stored in reverse order with the latest state as the + /// head. + struct PathSegment { + uint64_t State; + PathSegment *Tail; + }; + + /// We allocate segment objects frequently. Allocate them upfront and dispose + /// at the end of a traversal rather than hammering the system allocator. + SpecificBumpPtrAllocator<PathSegment> Allocator; + + /// Heads of each tracked path. These are not ordered. + std::deque<PathSegment *> Heads; + + /// The returned paths. This is populated during getPaths. + SmallVector<NfaPath, 4> Paths; + + /// Create a new segment and return it. + PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) { + PathSegment *P = Allocator.Allocate(); + *P = {State, Tail}; + return P; + } + + /// Pairs defines a sequence of possible NFA transitions for a single DFA + /// transition. + void transition(ArrayRef<NfaStatePair> Pairs) { + // Iterate over all existing heads. We will mutate the Heads deque during + // iteration. + unsigned NumHeads = Heads.size(); + for (unsigned I = 0; I < NumHeads; ++I) { + PathSegment *Head = Heads[I]; + // The sequence of pairs is sorted. Select the set of pairs that + // transition from the current head state. + auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL}); + auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX}); + // For every transition from the current head state, add a new path + // segment. + for (; PI != PE; ++PI) + if (PI->FromDfaState == Head->State) + Heads.push_back(makePathSegment(PI->ToDfaState, Head)); + } + // Now we've iterated over all the initial heads and added new ones, + // dispose of the original heads. + Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads)); + } + +public: + NfaTranscriber(ArrayRef<NfaStatePair> TransitionInfo) + : TransitionInfo(TransitionInfo) { + reset(); + } + + void reset() { + Paths.clear(); + Heads.clear(); + Allocator.DestroyAll(); + // The initial NFA state is 0. + Heads.push_back(makePathSegment(0ULL, nullptr)); + } + + void transition(unsigned TransitionInfoIdx) { + unsigned EndIdx = TransitionInfoIdx; + while (TransitionInfo[EndIdx].ToDfaState != 0) + ++EndIdx; + ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx], + EndIdx - TransitionInfoIdx); + transition(Pairs); + } + + ArrayRef<NfaPath> getPaths() { + Paths.clear(); + for (auto *Head : Heads) { + NfaPath P; + while (Head->State != 0) { + P.push_back(Head->State); + Head = Head->Tail; + } + std::reverse(P.begin(), P.end()); + Paths.push_back(std::move(P)); + } + return Paths; + } +}; +} // namespace internal + +/// A deterministic finite-state automaton. The automaton is defined in +/// TableGen; this object drives an automaton defined by tblgen-emitted tables. +/// +/// An automaton accepts a sequence of input tokens ("actions"). This class is +/// templated on the type of these actions. +template <typename ActionT> class Automaton { + /// Map from {State, Action} to {NewState, TransitionInfoIdx}. + /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition. + /// FIXME: This uses a std::map because ActionT can be a pair type including + /// an enum. In particular DenseMapInfo<ActionT> must be defined to use + /// DenseMap here. + /// This is a shared_ptr to allow very quick copy-construction of Automata; this + /// state is immutable after construction so this is safe. + using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>; + std::shared_ptr<MapTy> M; + /// An optional transcription object. This uses much more state than simply + /// traversing the DFA for acceptance, so is heap allocated. + std::shared_ptr<internal::NfaTranscriber> Transcriber; + /// The initial DFA state is 1. + uint64_t State = 1; + /// True if we should transcribe and false if not (even if Transcriber is defined). + bool Transcribe; + +public: + /// Create an automaton. + /// \param Transitions The Transitions table as created by TableGen. Note that + /// because the action type differs per automaton, the + /// table type is templated as ArrayRef<InfoT>. + /// \param TranscriptionTable The TransitionInfo table as created by TableGen. + /// + /// Providing the TranscriptionTable argument as non-empty will enable the + /// use of transcription, which analyzes the possible paths in the original + /// NFA taken by the DFA. NOTE: This is substantially more work than simply + /// driving the DFA, so unless you require the getPaths() method leave this + /// empty. + template <typename InfoT> + Automaton(ArrayRef<InfoT> Transitions, + ArrayRef<NfaStatePair> TranscriptionTable = {}) { + if (!TranscriptionTable.empty()) + Transcriber = + std::make_shared<internal::NfaTranscriber>(TranscriptionTable); + Transcribe = Transcriber != nullptr; + M = std::make_shared<MapTy>(); + for (const auto &I : Transitions) + // Greedily read and cache the transition table. + M->emplace(std::make_pair(I.FromDfaState, I.Action), + std::make_pair(I.ToDfaState, I.InfoIdx)); + } + Automaton(const Automaton &) = default; + + /// Reset the automaton to its initial state. + void reset() { + State = 1; + if (Transcriber) + Transcriber->reset(); + } + + /// Enable or disable transcription. Transcription is only available if + /// TranscriptionTable was provided to the constructor. + void enableTranscription(bool Enable = true) { + assert(Transcriber && + "Transcription is only available if TranscriptionTable was provided " + "to the Automaton constructor"); + Transcribe = Enable; + } + + /// Transition the automaton based on input symbol A. Return true if the + /// automaton transitioned to a valid state, false if the automaton + /// transitioned to an invalid state. + /// + /// If this function returns false, all methods are undefined until reset() is + /// called. + bool add(const ActionT &A) { + auto I = M->find({State, A}); + if (I == M->end()) + return false; + if (Transcriber && Transcribe) + Transcriber->transition(I->second.second); + State = I->second.first; + return true; + } + + /// Return true if the automaton can be transitioned based on input symbol A. + bool canAdd(const ActionT &A) { + auto I = M->find({State, A}); + return I != M->end(); + } + + /// Obtain a set of possible paths through the input nondeterministic + /// automaton that could be obtained from the sequence of input actions + /// presented to this deterministic automaton. + ArrayRef<NfaPath> getNfaPaths() { + assert(Transcriber && Transcribe && + "Can only obtain NFA paths if transcribing!"); + return Transcriber->getPaths(); + } +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_AUTOMATON_H |