aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/DebugInfo/Symbolize/Markup.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/DebugInfo/Symbolize/Markup.cpp')
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Markup.cpp202
1 files changed, 202 insertions, 0 deletions
diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
new file mode 100644
index 000000000000..9bc65e763287
--- /dev/null
+++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
@@ -0,0 +1,202 @@
+//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the log symbolizer markup data model and parser.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+
+namespace llvm {
+namespace symbolize {
+
+// Matches the following:
+// "\033[0m"
+// "\033[1m"
+// "\033[30m" -- "\033[37m"
+static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
+
+MarkupParser::MarkupParser(StringSet<> MultilineTags)
+ : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
+
+static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
+ return Str.take_front(Pos - Str.begin());
+}
+static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
+ Str = Str.drop_front(Pos - Str.begin());
+}
+
+void MarkupParser::parseLine(StringRef Line) {
+ Buffer.clear();
+ NextIdx = 0;
+ FinishedMultiline.clear();
+ this->Line = Line;
+}
+
+Optional<MarkupNode> MarkupParser::nextNode() {
+ // Pull something out of the buffer if possible.
+ if (!Buffer.empty()) {
+ if (NextIdx < Buffer.size())
+ return std::move(Buffer[NextIdx++]);
+ NextIdx = 0;
+ Buffer.clear();
+ }
+
+ // The buffer is empty, so parse the next bit of the line.
+
+ if (Line.empty())
+ return None;
+
+ if (!InProgressMultiline.empty()) {
+ if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
+ llvm::append_range(InProgressMultiline, *MultilineEnd);
+ assert(FinishedMultiline.empty() &&
+ "At most one multi-line element can be finished at a time.");
+ FinishedMultiline.swap(InProgressMultiline);
+ // Parse the multi-line element as if it were contiguous.
+ advanceTo(Line, MultilineEnd->end());
+ return *parseElement(FinishedMultiline);
+ }
+
+ // The whole line is part of the multi-line element.
+ llvm::append_range(InProgressMultiline, Line);
+ Line = Line.drop_front(Line.size());
+ return None;
+ }
+
+ // Find the first valid markup element, if any.
+ if (Optional<MarkupNode> Element = parseElement(Line)) {
+ parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
+ Buffer.push_back(std::move(*Element));
+ advanceTo(Line, Element->Text.end());
+ return nextNode();
+ }
+
+ // Since there were no valid elements remaining, see if the line opens a
+ // multi-line element.
+ if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
+ // Emit any text before the element.
+ parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
+
+ // Begin recording the multi-line element.
+ llvm::append_range(InProgressMultiline, *MultilineBegin);
+ Line = Line.drop_front(Line.size());
+ return nextNode();
+ }
+
+ // The line doesn't contain any more markup elements, so emit it as text.
+ parseTextOutsideMarkup(Line);
+ Line = Line.drop_front(Line.size());
+ return nextNode();
+}
+
+void MarkupParser::flush() {
+ if (InProgressMultiline.empty())
+ return;
+ FinishedMultiline.swap(InProgressMultiline);
+ parseTextOutsideMarkup(FinishedMultiline);
+}
+
+// Finds and returns the next valid markup element in the given line. Returns
+// None if the line contains no valid elements.
+Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
+ while (true) {
+ // Find next element using begin and end markers.
+ size_t BeginPos = Line.find("{{{");
+ if (BeginPos == StringRef::npos)
+ return None;
+ size_t EndPos = Line.find("}}}", BeginPos + 3);
+ if (EndPos == StringRef::npos)
+ return None;
+ EndPos += 3;
+ MarkupNode Element;
+ Element.Text = Line.slice(BeginPos, EndPos);
+ Line = Line.substr(EndPos);
+
+ // Parse tag.
+ StringRef Content = Element.Text.drop_front(3).drop_back(3);
+ StringRef FieldsContent;
+ std::tie(Element.Tag, FieldsContent) = Content.split(':');
+ if (Element.Tag.empty())
+ continue;
+
+ // Parse fields.
+ if (!FieldsContent.empty())
+ FieldsContent.split(Element.Fields, ":");
+ else if (Content.back() == ':')
+ Element.Fields.push_back(FieldsContent);
+
+ return Element;
+ }
+}
+
+static MarkupNode textNode(StringRef Text) {
+ MarkupNode Node;
+ Node.Text = Text;
+ return Node;
+}
+
+// Parses a region of text known to be outside any markup elements. Such text
+// may still contain SGR control codes, so the region is further subdivided into
+// control codes and true text regions.
+void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
+ if (Text.empty())
+ return;
+ SmallVector<StringRef> Matches;
+ while (SGRSyntax.match(Text, &Matches)) {
+ // Emit any text before the SGR element.
+ if (Matches.begin()->begin() != Text.begin())
+ Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
+
+ Buffer.push_back(textNode(*Matches.begin()));
+ advanceTo(Text, Matches.begin()->end());
+ }
+ if (!Text.empty())
+ Buffer.push_back(textNode(Text));
+}
+
+// Given that a line doesn't contain any valid markup, see if it ends with the
+// start of a multi-line element. If so, returns the beginning.
+Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
+ // A multi-line begin marker must be the last one on the line.
+ size_t BeginPos = Line.rfind("{{{");
+ if (BeginPos == StringRef::npos)
+ return None;
+ size_t BeginTagPos = BeginPos + 3;
+
+ // If there are any end markers afterwards, the begin marker cannot belong to
+ // a multi-line element.
+ size_t EndPos = Line.find("}}}", BeginTagPos);
+ if (EndPos != StringRef::npos)
+ return None;
+
+ // Check whether the tag is registered multi-line.
+ size_t EndTagPos = Line.find(':', BeginTagPos);
+ if (EndTagPos == StringRef::npos)
+ return None;
+ StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
+ if (!MultilineTags.contains(Tag))
+ return None;
+ return Line.substr(BeginPos);
+}
+
+// See if the line begins with the ending of an in-progress multi-line element.
+// If so, return the ending.
+Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
+ size_t EndPos = Line.find("}}}");
+ if (EndPos == StringRef::npos)
+ return None;
+ return Line.take_front(EndPos + 3);
+}
+
+} // end namespace symbolize
+} // end namespace llvm