diff options
Diffstat (limited to 'llvm/lib/Support/YAMLParser.cpp')
| -rw-r--r-- | llvm/lib/Support/YAMLParser.cpp | 2442 | 
1 files changed, 2442 insertions, 0 deletions
diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp new file mode 100644 index 0000000000000..9b2fe9c4418a1 --- /dev/null +++ b/llvm/lib/Support/YAMLParser.cpp @@ -0,0 +1,2442 @@ +//===- YAMLParser.cpp - Simple YAML parser --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements a YAML parser. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YAMLParser.h" +#include "llvm/ADT/AllocatorList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/Unicode.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <map> +#include <memory> +#include <string> +#include <system_error> +#include <utility> + +using namespace llvm; +using namespace yaml; + +enum UnicodeEncodingForm { +  UEF_UTF32_LE, ///< UTF-32 Little Endian +  UEF_UTF32_BE, ///< UTF-32 Big Endian +  UEF_UTF16_LE, ///< UTF-16 Little Endian +  UEF_UTF16_BE, ///< UTF-16 Big Endian +  UEF_UTF8,     ///< UTF-8 or ascii. +  UEF_Unknown   ///< Not a valid Unicode encoding. +}; + +/// EncodingInfo - Holds the encoding type and length of the byte order mark if +///                it exists. Length is in {0, 2, 3, 4}. +using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; + +/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode +///                      encoding form of \a Input. +/// +/// @param Input A string of length 0 or more. +/// @returns An EncodingInfo indicating the Unicode encoding form of the input +///          and how long the byte order mark is if one exists. +static EncodingInfo getUnicodeEncoding(StringRef Input) { +  if (Input.empty()) +    return std::make_pair(UEF_Unknown, 0); + +  switch (uint8_t(Input[0])) { +  case 0x00: +    if (Input.size() >= 4) { +      if (  Input[1] == 0 +         && uint8_t(Input[2]) == 0xFE +         && uint8_t(Input[3]) == 0xFF) +        return std::make_pair(UEF_UTF32_BE, 4); +      if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) +        return std::make_pair(UEF_UTF32_BE, 0); +    } + +    if (Input.size() >= 2 && Input[1] != 0) +      return std::make_pair(UEF_UTF16_BE, 0); +    return std::make_pair(UEF_Unknown, 0); +  case 0xFF: +    if (  Input.size() >= 4 +       && uint8_t(Input[1]) == 0xFE +       && Input[2] == 0 +       && Input[3] == 0) +      return std::make_pair(UEF_UTF32_LE, 4); + +    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) +      return std::make_pair(UEF_UTF16_LE, 2); +    return std::make_pair(UEF_Unknown, 0); +  case 0xFE: +    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) +      return std::make_pair(UEF_UTF16_BE, 2); +    return std::make_pair(UEF_Unknown, 0); +  case 0xEF: +    if (  Input.size() >= 3 +       && uint8_t(Input[1]) == 0xBB +       && uint8_t(Input[2]) == 0xBF) +      return std::make_pair(UEF_UTF8, 3); +    return std::make_pair(UEF_Unknown, 0); +  } + +  // It could still be utf-32 or utf-16. +  if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) +    return std::make_pair(UEF_UTF32_LE, 0); + +  if (Input.size() >= 2 && Input[1] == 0) +    return std::make_pair(UEF_UTF16_LE, 0); + +  return std::make_pair(UEF_UTF8, 0); +} + +/// Pin the vtables to this file. +void Node::anchor() {} +void NullNode::anchor() {} +void ScalarNode::anchor() {} +void BlockScalarNode::anchor() {} +void KeyValueNode::anchor() {} +void MappingNode::anchor() {} +void SequenceNode::anchor() {} +void AliasNode::anchor() {} + +namespace llvm { +namespace yaml { + +/// Token - A single YAML token. +struct Token { +  enum TokenKind { +    TK_Error, // Uninitialized token. +    TK_StreamStart, +    TK_StreamEnd, +    TK_VersionDirective, +    TK_TagDirective, +    TK_DocumentStart, +    TK_DocumentEnd, +    TK_BlockEntry, +    TK_BlockEnd, +    TK_BlockSequenceStart, +    TK_BlockMappingStart, +    TK_FlowEntry, +    TK_FlowSequenceStart, +    TK_FlowSequenceEnd, +    TK_FlowMappingStart, +    TK_FlowMappingEnd, +    TK_Key, +    TK_Value, +    TK_Scalar, +    TK_BlockScalar, +    TK_Alias, +    TK_Anchor, +    TK_Tag +  } Kind = TK_Error; + +  /// A string of length 0 or more whose begin() points to the logical location +  /// of the token in the input. +  StringRef Range; + +  /// The value of a block scalar node. +  std::string Value; + +  Token() = default; +}; + +} // end namespace yaml +} // end namespace llvm + +using TokenQueueT = BumpPtrList<Token>; + +namespace { + +/// This struct is used to track simple keys. +/// +/// Simple keys are handled by creating an entry in SimpleKeys for each Token +/// which could legally be the start of a simple key. When peekNext is called, +/// if the Token To be returned is referenced by a SimpleKey, we continue +/// tokenizing until that potential simple key has either been found to not be +/// a simple key (we moved on to the next line or went further than 1024 chars). +/// Or when we run into a Value, and then insert a Key token (and possibly +/// others) before the SimpleKey's Tok. +struct SimpleKey { +  TokenQueueT::iterator Tok; +  unsigned Column; +  unsigned Line; +  unsigned FlowLevel; +  bool IsRequired; + +  bool operator ==(const SimpleKey &Other) { +    return Tok == Other.Tok; +  } +}; + +} // end anonymous namespace + +/// The Unicode scalar value of a UTF-8 minimal well-formed code unit +///        subsequence and the subsequence's length in code units (uint8_t). +///        A length of 0 represents an error. +using UTF8Decoded = std::pair<uint32_t, unsigned>; + +static UTF8Decoded decodeUTF8(StringRef Range) { +  StringRef::iterator Position= Range.begin(); +  StringRef::iterator End = Range.end(); +  // 1 byte: [0x00, 0x7f] +  // Bit pattern: 0xxxxxxx +  if ((*Position & 0x80) == 0) { +     return std::make_pair(*Position, 1); +  } +  // 2 bytes: [0x80, 0x7ff] +  // Bit pattern: 110xxxxx 10xxxxxx +  if (Position + 1 != End && +      ((*Position & 0xE0) == 0xC0) && +      ((*(Position + 1) & 0xC0) == 0x80)) { +    uint32_t codepoint = ((*Position & 0x1F) << 6) | +                          (*(Position + 1) & 0x3F); +    if (codepoint >= 0x80) +      return std::make_pair(codepoint, 2); +  } +  // 3 bytes: [0x8000, 0xffff] +  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx +  if (Position + 2 != End && +      ((*Position & 0xF0) == 0xE0) && +      ((*(Position + 1) & 0xC0) == 0x80) && +      ((*(Position + 2) & 0xC0) == 0x80)) { +    uint32_t codepoint = ((*Position & 0x0F) << 12) | +                         ((*(Position + 1) & 0x3F) << 6) | +                          (*(Position + 2) & 0x3F); +    // Codepoints between 0xD800 and 0xDFFF are invalid, as +    // they are high / low surrogate halves used by UTF-16. +    if (codepoint >= 0x800 && +        (codepoint < 0xD800 || codepoint > 0xDFFF)) +      return std::make_pair(codepoint, 3); +  } +  // 4 bytes: [0x10000, 0x10FFFF] +  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +  if (Position + 3 != End && +      ((*Position & 0xF8) == 0xF0) && +      ((*(Position + 1) & 0xC0) == 0x80) && +      ((*(Position + 2) & 0xC0) == 0x80) && +      ((*(Position + 3) & 0xC0) == 0x80)) { +    uint32_t codepoint = ((*Position & 0x07) << 18) | +                         ((*(Position + 1) & 0x3F) << 12) | +                         ((*(Position + 2) & 0x3F) << 6) | +                          (*(Position + 3) & 0x3F); +    if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) +      return std::make_pair(codepoint, 4); +  } +  return std::make_pair(0, 0); +} + +namespace llvm { +namespace yaml { + +/// Scans YAML tokens from a MemoryBuffer. +class Scanner { +public: +  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, +          std::error_code *EC = nullptr); +  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, +          std::error_code *EC = nullptr); + +  /// Parse the next token and return it without popping it. +  Token &peekNext(); + +  /// Parse the next token and pop it from the queue. +  Token getNext(); + +  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, +                  ArrayRef<SMRange> Ranges = None) { +    SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors); +  } + +  void setError(const Twine &Message, StringRef::iterator Position) { +    if (Current >= End) +      Current = End - 1; + +    // propagate the error if possible +    if (EC) +      *EC = make_error_code(std::errc::invalid_argument); + +    // Don't print out more errors after the first one we encounter. The rest +    // are just the result of the first, and have no meaning. +    if (!Failed) +      printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); +    Failed = true; +  } + +  void setError(const Twine &Message) { +    setError(Message, Current); +  } + +  /// Returns true if an error occurred while parsing. +  bool failed() { +    return Failed; +  } + +private: +  void init(MemoryBufferRef Buffer); + +  StringRef currentInput() { +    return StringRef(Current, End - Current); +  } + +  /// Decode a UTF-8 minimal well-formed code unit subsequence starting +  ///        at \a Position. +  /// +  /// If the UTF-8 code units starting at Position do not form a well-formed +  /// code unit subsequence, then the Unicode scalar value is 0, and the length +  /// is 0. +  UTF8Decoded decodeUTF8(StringRef::iterator Position) { +    return ::decodeUTF8(StringRef(Position, End - Position)); +  } + +  // The following functions are based on the gramar rules in the YAML spec. The +  // style of the function names it meant to closely match how they are written +  // in the spec. The number within the [] is the number of the grammar rule in +  // the spec. +  // +  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. +  // +  // c- +  //   A production starting and ending with a special character. +  // b- +  //   A production matching a single line break. +  // nb- +  //   A production starting and ending with a non-break character. +  // s- +  //   A production starting and ending with a white space character. +  // ns- +  //   A production starting and ending with a non-space character. +  // l- +  //   A production matching complete line(s). + +  /// Skip a single nb-char[27] starting at Position. +  /// +  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] +  ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] +  /// +  /// @returns The code unit after the nb-char, or Position if it's not an +  ///          nb-char. +  StringRef::iterator skip_nb_char(StringRef::iterator Position); + +  /// Skip a single b-break[28] starting at Position. +  /// +  /// A b-break is 0xD 0xA | 0xD | 0xA +  /// +  /// @returns The code unit after the b-break, or Position if it's not a +  ///          b-break. +  StringRef::iterator skip_b_break(StringRef::iterator Position); + +  /// Skip a single s-space[31] starting at Position. +  /// +  /// An s-space is 0x20 +  /// +  /// @returns The code unit after the s-space, or Position if it's not a +  ///          s-space. +  StringRef::iterator skip_s_space(StringRef::iterator Position); + +  /// Skip a single s-white[33] starting at Position. +  /// +  /// A s-white is 0x20 | 0x9 +  /// +  /// @returns The code unit after the s-white, or Position if it's not a +  ///          s-white. +  StringRef::iterator skip_s_white(StringRef::iterator Position); + +  /// Skip a single ns-char[34] starting at Position. +  /// +  /// A ns-char is nb-char - s-white +  /// +  /// @returns The code unit after the ns-char, or Position if it's not a +  ///          ns-char. +  StringRef::iterator skip_ns_char(StringRef::iterator Position); + +  using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); + +  /// Skip minimal well-formed code unit subsequences until Func +  ///        returns its input. +  /// +  /// @returns The code unit after the last minimal well-formed code unit +  ///          subsequence that Func accepted. +  StringRef::iterator skip_while( SkipWhileFunc Func +                                , StringRef::iterator Position); + +  /// Skip minimal well-formed code unit subsequences until Func returns its +  /// input. +  void advanceWhile(SkipWhileFunc Func); + +  /// Scan ns-uri-char[39]s starting at Cur. +  /// +  /// This updates Cur and Column while scanning. +  void scan_ns_uri_char(); + +  /// Consume a minimal well-formed code unit subsequence starting at +  ///        \a Cur. Return false if it is not the same Unicode scalar value as +  ///        \a Expected. This updates \a Column. +  bool consume(uint32_t Expected); + +  /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. +  void skip(uint32_t Distance); + +  /// Return true if the minimal well-formed code unit subsequence at +  ///        Pos is whitespace or a new line +  bool isBlankOrBreak(StringRef::iterator Position); + +  /// Consume a single b-break[28] if it's present at the current position. +  /// +  /// Return false if the code unit at the current position isn't a line break. +  bool consumeLineBreakIfPresent(); + +  /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey. +  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok +                             , unsigned AtColumn +                             , bool IsRequired); + +  /// Remove simple keys that can no longer be valid simple keys. +  /// +  /// Invalid simple keys are not on the current line or are further than 1024 +  /// columns back. +  void removeStaleSimpleKeyCandidates(); + +  /// Remove all simple keys on FlowLevel \a Level. +  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); + +  /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd +  ///        tokens if needed. +  bool unrollIndent(int ToColumn); + +  /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint +  ///        if needed. +  bool rollIndent( int ToColumn +                 , Token::TokenKind Kind +                 , TokenQueueT::iterator InsertPoint); + +  /// Skip a single-line comment when the comment starts at the current +  /// position of the scanner. +  void skipComment(); + +  /// Skip whitespace and comments until the start of the next token. +  void scanToNextToken(); + +  /// Must be the first token generated. +  bool scanStreamStart(); + +  /// Generate tokens needed to close out the stream. +  bool scanStreamEnd(); + +  /// Scan a %BLAH directive. +  bool scanDirective(); + +  /// Scan a ... or ---. +  bool scanDocumentIndicator(bool IsStart); + +  /// Scan a [ or { and generate the proper flow collection start token. +  bool scanFlowCollectionStart(bool IsSequence); + +  /// Scan a ] or } and generate the proper flow collection end token. +  bool scanFlowCollectionEnd(bool IsSequence); + +  /// Scan the , that separates entries in a flow collection. +  bool scanFlowEntry(); + +  /// Scan the - that starts block sequence entries. +  bool scanBlockEntry(); + +  /// Scan an explicit ? indicating a key. +  bool scanKey(); + +  /// Scan an explicit : indicating a value. +  bool scanValue(); + +  /// Scan a quoted scalar. +  bool scanFlowScalar(bool IsDoubleQuoted); + +  /// Scan an unquoted scalar. +  bool scanPlainScalar(); + +  /// Scan an Alias or Anchor starting with * or &. +  bool scanAliasOrAnchor(bool IsAlias); + +  /// Scan a block scalar starting with | or >. +  bool scanBlockScalar(bool IsLiteral); + +  /// Scan a chomping indicator in a block scalar header. +  char scanBlockChompingIndicator(); + +  /// Scan an indentation indicator in a block scalar header. +  unsigned scanBlockIndentationIndicator(); + +  /// Scan a block scalar header. +  /// +  /// Return false if an error occurred. +  bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, +                             bool &IsDone); + +  /// Look for the indentation level of a block scalar. +  /// +  /// Return false if an error occurred. +  bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, +                             unsigned &LineBreaks, bool &IsDone); + +  /// Scan the indentation of a text line in a block scalar. +  /// +  /// Return false if an error occurred. +  bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, +                             bool &IsDone); + +  /// Scan a tag of the form !stuff. +  bool scanTag(); + +  /// Dispatch to the next scanning function based on \a *Cur. +  bool fetchMoreTokens(); + +  /// The SourceMgr used for diagnostics and buffer management. +  SourceMgr &SM; + +  /// The original input. +  MemoryBufferRef InputBuffer; + +  /// The current position of the scanner. +  StringRef::iterator Current; + +  /// The end of the input (one past the last character). +  StringRef::iterator End; + +  /// Current YAML indentation level in spaces. +  int Indent; + +  /// Current column number in Unicode code points. +  unsigned Column; + +  /// Current line number. +  unsigned Line; + +  /// How deep we are in flow style containers. 0 Means at block level. +  unsigned FlowLevel; + +  /// Are we at the start of the stream? +  bool IsStartOfStream; + +  /// Can the next token be the start of a simple key? +  bool IsSimpleKeyAllowed; + +  /// True if an error has occurred. +  bool Failed; + +  /// Should colors be used when printing out the diagnostic messages? +  bool ShowColors; + +  /// Queue of tokens. This is required to queue up tokens while looking +  ///        for the end of a simple key. And for cases where a single character +  ///        can produce multiple tokens (e.g. BlockEnd). +  TokenQueueT TokenQueue; + +  /// Indentation levels. +  SmallVector<int, 4> Indents; + +  /// Potential simple keys. +  SmallVector<SimpleKey, 4> SimpleKeys; + +  std::error_code *EC; +}; + +} // end namespace yaml +} // end namespace llvm + +/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. +static void encodeUTF8( uint32_t UnicodeScalarValue +                      , SmallVectorImpl<char> &Result) { +  if (UnicodeScalarValue <= 0x7F) { +    Result.push_back(UnicodeScalarValue & 0x7F); +  } else if (UnicodeScalarValue <= 0x7FF) { +    uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); +    uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); +    Result.push_back(FirstByte); +    Result.push_back(SecondByte); +  } else if (UnicodeScalarValue <= 0xFFFF) { +    uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); +    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); +    uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); +    Result.push_back(FirstByte); +    Result.push_back(SecondByte); +    Result.push_back(ThirdByte); +  } else if (UnicodeScalarValue <= 0x10FFFF) { +    uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); +    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); +    uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); +    uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); +    Result.push_back(FirstByte); +    Result.push_back(SecondByte); +    Result.push_back(ThirdByte); +    Result.push_back(FourthByte); +  } +} + +bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { +  SourceMgr SM; +  Scanner scanner(Input, SM); +  while (true) { +    Token T = scanner.getNext(); +    switch (T.Kind) { +    case Token::TK_StreamStart: +      OS << "Stream-Start: "; +      break; +    case Token::TK_StreamEnd: +      OS << "Stream-End: "; +      break; +    case Token::TK_VersionDirective: +      OS << "Version-Directive: "; +      break; +    case Token::TK_TagDirective: +      OS << "Tag-Directive: "; +      break; +    case Token::TK_DocumentStart: +      OS << "Document-Start: "; +      break; +    case Token::TK_DocumentEnd: +      OS << "Document-End: "; +      break; +    case Token::TK_BlockEntry: +      OS << "Block-Entry: "; +      break; +    case Token::TK_BlockEnd: +      OS << "Block-End: "; +      break; +    case Token::TK_BlockSequenceStart: +      OS << "Block-Sequence-Start: "; +      break; +    case Token::TK_BlockMappingStart: +      OS << "Block-Mapping-Start: "; +      break; +    case Token::TK_FlowEntry: +      OS << "Flow-Entry: "; +      break; +    case Token::TK_FlowSequenceStart: +      OS << "Flow-Sequence-Start: "; +      break; +    case Token::TK_FlowSequenceEnd: +      OS << "Flow-Sequence-End: "; +      break; +    case Token::TK_FlowMappingStart: +      OS << "Flow-Mapping-Start: "; +      break; +    case Token::TK_FlowMappingEnd: +      OS << "Flow-Mapping-End: "; +      break; +    case Token::TK_Key: +      OS << "Key: "; +      break; +    case Token::TK_Value: +      OS << "Value: "; +      break; +    case Token::TK_Scalar: +      OS << "Scalar: "; +      break; +    case Token::TK_BlockScalar: +      OS << "Block Scalar: "; +      break; +    case Token::TK_Alias: +      OS << "Alias: "; +      break; +    case Token::TK_Anchor: +      OS << "Anchor: "; +      break; +    case Token::TK_Tag: +      OS << "Tag: "; +      break; +    case Token::TK_Error: +      break; +    } +    OS << T.Range << "\n"; +    if (T.Kind == Token::TK_StreamEnd) +      break; +    else if (T.Kind == Token::TK_Error) +      return false; +  } +  return true; +} + +bool yaml::scanTokens(StringRef Input) { +  SourceMgr SM; +  Scanner scanner(Input, SM); +  while (true) { +    Token T = scanner.getNext(); +    if (T.Kind == Token::TK_StreamEnd) +      break; +    else if (T.Kind == Token::TK_Error) +      return false; +  } +  return true; +} + +std::string yaml::escape(StringRef Input, bool EscapePrintable) { +  std::string EscapedInput; +  for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { +    if (*i == '\\') +      EscapedInput += "\\\\"; +    else if (*i == '"') +      EscapedInput += "\\\""; +    else if (*i == 0) +      EscapedInput += "\\0"; +    else if (*i == 0x07) +      EscapedInput += "\\a"; +    else if (*i == 0x08) +      EscapedInput += "\\b"; +    else if (*i == 0x09) +      EscapedInput += "\\t"; +    else if (*i == 0x0A) +      EscapedInput += "\\n"; +    else if (*i == 0x0B) +      EscapedInput += "\\v"; +    else if (*i == 0x0C) +      EscapedInput += "\\f"; +    else if (*i == 0x0D) +      EscapedInput += "\\r"; +    else if (*i == 0x1B) +      EscapedInput += "\\e"; +    else if ((unsigned char)*i < 0x20) { // Control characters not handled above. +      std::string HexStr = utohexstr(*i); +      EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; +    } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. +      UTF8Decoded UnicodeScalarValue +        = decodeUTF8(StringRef(i, Input.end() - i)); +      if (UnicodeScalarValue.second == 0) { +        // Found invalid char. +        SmallString<4> Val; +        encodeUTF8(0xFFFD, Val); +        EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); +        // FIXME: Error reporting. +        return EscapedInput; +      } +      if (UnicodeScalarValue.first == 0x85) +        EscapedInput += "\\N"; +      else if (UnicodeScalarValue.first == 0xA0) +        EscapedInput += "\\_"; +      else if (UnicodeScalarValue.first == 0x2028) +        EscapedInput += "\\L"; +      else if (UnicodeScalarValue.first == 0x2029) +        EscapedInput += "\\P"; +      else if (!EscapePrintable && +               sys::unicode::isPrintable(UnicodeScalarValue.first)) +        EscapedInput += StringRef(i, UnicodeScalarValue.second); +      else { +        std::string HexStr = utohexstr(UnicodeScalarValue.first); +        if (HexStr.size() <= 2) +          EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; +        else if (HexStr.size() <= 4) +          EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; +        else if (HexStr.size() <= 8) +          EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; +      } +      i += UnicodeScalarValue.second - 1; +    } else +      EscapedInput.push_back(*i); +  } +  return EscapedInput; +} + +Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, +                 std::error_code *EC) +    : SM(sm), ShowColors(ShowColors), EC(EC) { +  init(MemoryBufferRef(Input, "YAML")); +} + +Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, +                 std::error_code *EC) +    : SM(SM_), ShowColors(ShowColors), EC(EC) { +  init(Buffer); +} + +void Scanner::init(MemoryBufferRef Buffer) { +  InputBuffer = Buffer; +  Current = InputBuffer.getBufferStart(); +  End = InputBuffer.getBufferEnd(); +  Indent = -1; +  Column = 0; +  Line = 0; +  FlowLevel = 0; +  IsStartOfStream = true; +  IsSimpleKeyAllowed = true; +  Failed = false; +  std::unique_ptr<MemoryBuffer> InputBufferOwner = +      MemoryBuffer::getMemBuffer(Buffer); +  SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); +} + +Token &Scanner::peekNext() { +  // If the current token is a possible simple key, keep parsing until we +  // can confirm. +  bool NeedMore = false; +  while (true) { +    if (TokenQueue.empty() || NeedMore) { +      if (!fetchMoreTokens()) { +        TokenQueue.clear(); +        TokenQueue.push_back(Token()); +        return TokenQueue.front(); +      } +    } +    assert(!TokenQueue.empty() && +            "fetchMoreTokens lied about getting tokens!"); + +    removeStaleSimpleKeyCandidates(); +    SimpleKey SK; +    SK.Tok = TokenQueue.begin(); +    if (!is_contained(SimpleKeys, SK)) +      break; +    else +      NeedMore = true; +  } +  return TokenQueue.front(); +} + +Token Scanner::getNext() { +  Token Ret = peekNext(); +  // TokenQueue can be empty if there was an error getting the next token. +  if (!TokenQueue.empty()) +    TokenQueue.pop_front(); + +  // There cannot be any referenced Token's if the TokenQueue is empty. So do a +  // quick deallocation of them all. +  if (TokenQueue.empty()) +    TokenQueue.resetAlloc(); + +  return Ret; +} + +StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { +  if (Position == End) +    return Position; +  // Check 7 bit c-printable - b-char. +  if (   *Position == 0x09 +      || (*Position >= 0x20 && *Position <= 0x7E)) +    return Position + 1; + +  // Check for valid UTF-8. +  if (uint8_t(*Position) & 0x80) { +    UTF8Decoded u8d = decodeUTF8(Position); +    if (   u8d.second != 0 +        && u8d.first != 0xFEFF +        && ( u8d.first == 0x85 +          || ( u8d.first >= 0xA0 +            && u8d.first <= 0xD7FF) +          || ( u8d.first >= 0xE000 +            && u8d.first <= 0xFFFD) +          || ( u8d.first >= 0x10000 +            && u8d.first <= 0x10FFFF))) +      return Position + u8d.second; +  } +  return Position; +} + +StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { +  if (Position == End) +    return Position; +  if (*Position == 0x0D) { +    if (Position + 1 != End && *(Position + 1) == 0x0A) +      return Position + 2; +    return Position + 1; +  } + +  if (*Position == 0x0A) +    return Position + 1; +  return Position; +} + +StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { +  if (Position == End) +    return Position; +  if (*Position == ' ') +    return Position + 1; +  return Position; +} + +StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { +  if (Position == End) +    return Position; +  if (*Position == ' ' || *Position == '\t') +    return Position + 1; +  return Position; +} + +StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { +  if (Position == End) +    return Position; +  if (*Position == ' ' || *Position == '\t') +    return Position; +  return skip_nb_char(Position); +} + +StringRef::iterator Scanner::skip_while( SkipWhileFunc Func +                                       , StringRef::iterator Position) { +  while (true) { +    StringRef::iterator i = (this->*Func)(Position); +    if (i == Position) +      break; +    Position = i; +  } +  return Position; +} + +void Scanner::advanceWhile(SkipWhileFunc Func) { +  auto Final = skip_while(Func, Current); +  Column += Final - Current; +  Current = Final; +} + +static bool is_ns_hex_digit(const char C) { +  return    (C >= '0' && C <= '9') +         || (C >= 'a' && C <= 'z') +         || (C >= 'A' && C <= 'Z'); +} + +static bool is_ns_word_char(const char C) { +  return    C == '-' +         || (C >= 'a' && C <= 'z') +         || (C >= 'A' && C <= 'Z'); +} + +void Scanner::scan_ns_uri_char() { +  while (true) { +    if (Current == End) +      break; +    if ((   *Current == '%' +          && Current + 2 < End +          && is_ns_hex_digit(*(Current + 1)) +          && is_ns_hex_digit(*(Current + 2))) +        || is_ns_word_char(*Current) +        || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") +          != StringRef::npos) { +      ++Current; +      ++Column; +    } else +      break; +  } +} + +bool Scanner::consume(uint32_t Expected) { +  if (Expected >= 0x80) +    report_fatal_error("Not dealing with this yet"); +  if (Current == End) +    return false; +  if (uint8_t(*Current) >= 0x80) +    report_fatal_error("Not dealing with this yet"); +  if (uint8_t(*Current) == Expected) { +    ++Current; +    ++Column; +    return true; +  } +  return false; +} + +void Scanner::skip(uint32_t Distance) { +  Current += Distance; +  Column += Distance; +  assert(Current <= End && "Skipped past the end"); +} + +bool Scanner::isBlankOrBreak(StringRef::iterator Position) { +  if (Position == End) +    return false; +  return *Position == ' ' || *Position == '\t' || *Position == '\r' || +         *Position == '\n'; +} + +bool Scanner::consumeLineBreakIfPresent() { +  auto Next = skip_b_break(Current); +  if (Next == Current) +    return false; +  Column = 0; +  ++Line; +  Current = Next; +  return true; +} + +void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok +                                    , unsigned AtColumn +                                    , bool IsRequired) { +  if (IsSimpleKeyAllowed) { +    SimpleKey SK; +    SK.Tok = Tok; +    SK.Line = Line; +    SK.Column = AtColumn; +    SK.IsRequired = IsRequired; +    SK.FlowLevel = FlowLevel; +    SimpleKeys.push_back(SK); +  } +} + +void Scanner::removeStaleSimpleKeyCandidates() { +  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); +                                            i != SimpleKeys.end();) { +    if (i->Line != Line || i->Column + 1024 < Column) { +      if (i->IsRequired) +        setError( "Could not find expected : for simple key" +                , i->Tok->Range.begin()); +      i = SimpleKeys.erase(i); +    } else +      ++i; +  } +} + +void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { +  if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) +    SimpleKeys.pop_back(); +} + +bool Scanner::unrollIndent(int ToColumn) { +  Token T; +  // Indentation is ignored in flow. +  if (FlowLevel != 0) +    return true; + +  while (Indent > ToColumn) { +    T.Kind = Token::TK_BlockEnd; +    T.Range = StringRef(Current, 1); +    TokenQueue.push_back(T); +    Indent = Indents.pop_back_val(); +  } + +  return true; +} + +bool Scanner::rollIndent( int ToColumn +                        , Token::TokenKind Kind +                        , TokenQueueT::iterator InsertPoint) { +  if (FlowLevel) +    return true; +  if (Indent < ToColumn) { +    Indents.push_back(Indent); +    Indent = ToColumn; + +    Token T; +    T.Kind = Kind; +    T.Range = StringRef(Current, 0); +    TokenQueue.insert(InsertPoint, T); +  } +  return true; +} + +void Scanner::skipComment() { +  if (*Current != '#') +    return; +  while (true) { +    // This may skip more than one byte, thus Column is only incremented +    // for code points. +    StringRef::iterator I = skip_nb_char(Current); +    if (I == Current) +      break; +    Current = I; +    ++Column; +  } +} + +void Scanner::scanToNextToken() { +  while (true) { +    while (*Current == ' ' || *Current == '\t') { +      skip(1); +    } + +    skipComment(); + +    // Skip EOL. +    StringRef::iterator i = skip_b_break(Current); +    if (i == Current) +      break; +    Current = i; +    ++Line; +    Column = 0; +    // New lines may start a simple key. +    if (!FlowLevel) +      IsSimpleKeyAllowed = true; +  } +} + +bool Scanner::scanStreamStart() { +  IsStartOfStream = false; + +  EncodingInfo EI = getUnicodeEncoding(currentInput()); + +  Token T; +  T.Kind = Token::TK_StreamStart; +  T.Range = StringRef(Current, EI.second); +  TokenQueue.push_back(T); +  Current += EI.second; +  return true; +} + +bool Scanner::scanStreamEnd() { +  // Force an ending new line if one isn't present. +  if (Column != 0) { +    Column = 0; +    ++Line; +  } + +  unrollIndent(-1); +  SimpleKeys.clear(); +  IsSimpleKeyAllowed = false; + +  Token T; +  T.Kind = Token::TK_StreamEnd; +  T.Range = StringRef(Current, 0); +  TokenQueue.push_back(T); +  return true; +} + +bool Scanner::scanDirective() { +  // Reset the indentation level. +  unrollIndent(-1); +  SimpleKeys.clear(); +  IsSimpleKeyAllowed = false; + +  StringRef::iterator Start = Current; +  consume('%'); +  StringRef::iterator NameStart = Current; +  Current = skip_while(&Scanner::skip_ns_char, Current); +  StringRef Name(NameStart, Current - NameStart); +  Current = skip_while(&Scanner::skip_s_white, Current); + +  Token T; +  if (Name == "YAML") { +    Current = skip_while(&Scanner::skip_ns_char, Current); +    T.Kind = Token::TK_VersionDirective; +    T.Range = StringRef(Start, Current - Start); +    TokenQueue.push_back(T); +    return true; +  } else if(Name == "TAG") { +    Current = skip_while(&Scanner::skip_ns_char, Current); +    Current = skip_while(&Scanner::skip_s_white, Current); +    Current = skip_while(&Scanner::skip_ns_char, Current); +    T.Kind = Token::TK_TagDirective; +    T.Range = StringRef(Start, Current - Start); +    TokenQueue.push_back(T); +    return true; +  } +  return false; +} + +bool Scanner::scanDocumentIndicator(bool IsStart) { +  unrollIndent(-1); +  SimpleKeys.clear(); +  IsSimpleKeyAllowed = false; + +  Token T; +  T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; +  T.Range = StringRef(Current, 3); +  skip(3); +  TokenQueue.push_back(T); +  return true; +} + +bool Scanner::scanFlowCollectionStart(bool IsSequence) { +  Token T; +  T.Kind = IsSequence ? Token::TK_FlowSequenceStart +                      : Token::TK_FlowMappingStart; +  T.Range = StringRef(Current, 1); +  skip(1); +  TokenQueue.push_back(T); + +  // [ and { may begin a simple key. +  saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); + +  // And may also be followed by a simple key. +  IsSimpleKeyAllowed = true; +  ++FlowLevel; +  return true; +} + +bool Scanner::scanFlowCollectionEnd(bool IsSequence) { +  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); +  IsSimpleKeyAllowed = false; +  Token T; +  T.Kind = IsSequence ? Token::TK_FlowSequenceEnd +                      : Token::TK_FlowMappingEnd; +  T.Range = StringRef(Current, 1); +  skip(1); +  TokenQueue.push_back(T); +  if (FlowLevel) +    --FlowLevel; +  return true; +} + +bool Scanner::scanFlowEntry() { +  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); +  IsSimpleKeyAllowed = true; +  Token T; +  T.Kind = Token::TK_FlowEntry; +  T.Range = StringRef(Current, 1); +  skip(1); +  TokenQueue.push_back(T); +  return true; +} + +bool Scanner::scanBlockEntry() { +  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); +  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); +  IsSimpleKeyAllowed = true; +  Token T; +  T.Kind = Token::TK_BlockEntry; +  T.Range = StringRef(Current, 1); +  skip(1); +  TokenQueue.push_back(T); +  return true; +} + +bool Scanner::scanKey() { +  if (!FlowLevel) +    rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + +  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); +  IsSimpleKeyAllowed = !FlowLevel; + +  Token T; +  T.Kind = Token::TK_Key; +  T.Range = StringRef(Current, 1); +  skip(1); +  TokenQueue.push_back(T); +  return true; +} + +bool Scanner::scanValue() { +  // If the previous token could have been a simple key, insert the key token +  // into the token queue. +  if (!SimpleKeys.empty()) { +    SimpleKey SK = SimpleKeys.pop_back_val(); +    Token T; +    T.Kind = Token::TK_Key; +    T.Range = SK.Tok->Range; +    TokenQueueT::iterator i, e; +    for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { +      if (i == SK.Tok) +        break; +    } +    assert(i != e && "SimpleKey not in token queue!"); +    i = TokenQueue.insert(i, T); + +    // We may also need to add a Block-Mapping-Start token. +    rollIndent(SK.Column, Token::TK_BlockMappingStart, i); + +    IsSimpleKeyAllowed = false; +  } else { +    if (!FlowLevel) +      rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); +    IsSimpleKeyAllowed = !FlowLevel; +  } + +  Token T; +  T.Kind = Token::TK_Value; +  T.Range = StringRef(Current, 1); +  skip(1); +  TokenQueue.push_back(T); +  return true; +} + +// Forbidding inlining improves performance by roughly 20%. +// FIXME: Remove once llvm optimizes this to the faster version without hints. +LLVM_ATTRIBUTE_NOINLINE static bool +wasEscaped(StringRef::iterator First, StringRef::iterator Position); + +// Returns whether a character at 'Position' was escaped with a leading '\'. +// 'First' specifies the position of the first character in the string. +static bool wasEscaped(StringRef::iterator First, +                       StringRef::iterator Position) { +  assert(Position - 1 >= First); +  StringRef::iterator I = Position - 1; +  // We calculate the number of consecutive '\'s before the current position +  // by iterating backwards through our string. +  while (I >= First && *I == '\\') --I; +  // (Position - 1 - I) now contains the number of '\'s before the current +  // position. If it is odd, the character at 'Position' was escaped. +  return (Position - 1 - I) % 2 == 1; +} + +bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { +  StringRef::iterator Start = Current; +  unsigned ColStart = Column; +  if (IsDoubleQuoted) { +    do { +      ++Current; +      while (Current != End && *Current != '"') +        ++Current; +      // Repeat until the previous character was not a '\' or was an escaped +      // backslash. +    } while (   Current != End +             && *(Current - 1) == '\\' +             && wasEscaped(Start + 1, Current)); +  } else { +    skip(1); +    while (true) { +      // Skip a ' followed by another '. +      if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { +        skip(2); +        continue; +      } else if (*Current == '\'') +        break; +      StringRef::iterator i = skip_nb_char(Current); +      if (i == Current) { +        i = skip_b_break(Current); +        if (i == Current) +          break; +        Current = i; +        Column = 0; +        ++Line; +      } else { +        if (i == End) +          break; +        Current = i; +        ++Column; +      } +    } +  } + +  if (Current == End) { +    setError("Expected quote at end of scalar", Current); +    return false; +  } + +  skip(1); // Skip ending quote. +  Token T; +  T.Kind = Token::TK_Scalar; +  T.Range = StringRef(Start, Current - Start); +  TokenQueue.push_back(T); + +  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + +  IsSimpleKeyAllowed = false; + +  return true; +} + +bool Scanner::scanPlainScalar() { +  StringRef::iterator Start = Current; +  unsigned ColStart = Column; +  unsigned LeadingBlanks = 0; +  assert(Indent >= -1 && "Indent must be >= -1 !"); +  unsigned indent = static_cast<unsigned>(Indent + 1); +  while (true) { +    if (*Current == '#') +      break; + +    while (!isBlankOrBreak(Current)) { +      if (  FlowLevel && *Current == ':' +          && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { +        setError("Found unexpected ':' while scanning a plain scalar", Current); +        return false; +      } + +      // Check for the end of the plain scalar. +      if (  (*Current == ':' && isBlankOrBreak(Current + 1)) +          || (  FlowLevel +          && (StringRef(Current, 1).find_first_of(",:?[]{}") +              != StringRef::npos))) +        break; + +      StringRef::iterator i = skip_nb_char(Current); +      if (i == Current) +        break; +      Current = i; +      ++Column; +    } + +    // Are we at the end? +    if (!isBlankOrBreak(Current)) +      break; + +    // Eat blanks. +    StringRef::iterator Tmp = Current; +    while (isBlankOrBreak(Tmp)) { +      StringRef::iterator i = skip_s_white(Tmp); +      if (i != Tmp) { +        if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { +          setError("Found invalid tab character in indentation", Tmp); +          return false; +        } +        Tmp = i; +        ++Column; +      } else { +        i = skip_b_break(Tmp); +        if (!LeadingBlanks) +          LeadingBlanks = 1; +        Tmp = i; +        Column = 0; +        ++Line; +      } +    } + +    if (!FlowLevel && Column < indent) +      break; + +    Current = Tmp; +  } +  if (Start == Current) { +    setError("Got empty plain scalar", Start); +    return false; +  } +  Token T; +  T.Kind = Token::TK_Scalar; +  T.Range = StringRef(Start, Current - Start); +  TokenQueue.push_back(T); + +  // Plain scalars can be simple keys. +  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + +  IsSimpleKeyAllowed = false; + +  return true; +} + +bool Scanner::scanAliasOrAnchor(bool IsAlias) { +  StringRef::iterator Start = Current; +  unsigned ColStart = Column; +  skip(1); +  while(true) { +    if (   *Current == '[' || *Current == ']' +        || *Current == '{' || *Current == '}' +        || *Current == ',' +        || *Current == ':') +      break; +    StringRef::iterator i = skip_ns_char(Current); +    if (i == Current) +      break; +    Current = i; +    ++Column; +  } + +  if (Start == Current) { +    setError("Got empty alias or anchor", Start); +    return false; +  } + +  Token T; +  T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; +  T.Range = StringRef(Start, Current - Start); +  TokenQueue.push_back(T); + +  // Alias and anchors can be simple keys. +  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + +  IsSimpleKeyAllowed = false; + +  return true; +} + +char Scanner::scanBlockChompingIndicator() { +  char Indicator = ' '; +  if (Current != End && (*Current == '+' || *Current == '-')) { +    Indicator = *Current; +    skip(1); +  } +  return Indicator; +} + +/// Get the number of line breaks after chomping. +/// +/// Return the number of trailing line breaks to emit, depending on +/// \p ChompingIndicator. +static unsigned getChompedLineBreaks(char ChompingIndicator, +                                     unsigned LineBreaks, StringRef Str) { +  if (ChompingIndicator == '-') // Strip all line breaks. +    return 0; +  if (ChompingIndicator == '+') // Keep all line breaks. +    return LineBreaks; +  // Clip trailing lines. +  return Str.empty() ? 0 : 1; +} + +unsigned Scanner::scanBlockIndentationIndicator() { +  unsigned Indent = 0; +  if (Current != End && (*Current >= '1' && *Current <= '9')) { +    Indent = unsigned(*Current - '0'); +    skip(1); +  } +  return Indent; +} + +bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, +                                    unsigned &IndentIndicator, bool &IsDone) { +  auto Start = Current; + +  ChompingIndicator = scanBlockChompingIndicator(); +  IndentIndicator = scanBlockIndentationIndicator(); +  // Check for the chomping indicator once again. +  if (ChompingIndicator == ' ') +    ChompingIndicator = scanBlockChompingIndicator(); +  Current = skip_while(&Scanner::skip_s_white, Current); +  skipComment(); + +  if (Current == End) { // EOF, we have an empty scalar. +    Token T; +    T.Kind = Token::TK_BlockScalar; +    T.Range = StringRef(Start, Current - Start); +    TokenQueue.push_back(T); +    IsDone = true; +    return true; +  } + +  if (!consumeLineBreakIfPresent()) { +    setError("Expected a line break after block scalar header", Current); +    return false; +  } +  return true; +} + +bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, +                                    unsigned BlockExitIndent, +                                    unsigned &LineBreaks, bool &IsDone) { +  unsigned MaxAllSpaceLineCharacters = 0; +  StringRef::iterator LongestAllSpaceLine; + +  while (true) { +    advanceWhile(&Scanner::skip_s_space); +    if (skip_nb_char(Current) != Current) { +      // This line isn't empty, so try and find the indentation. +      if (Column <= BlockExitIndent) { // End of the block literal. +        IsDone = true; +        return true; +      } +      // We found the block's indentation. +      BlockIndent = Column; +      if (MaxAllSpaceLineCharacters > BlockIndent) { +        setError( +            "Leading all-spaces line must be smaller than the block indent", +            LongestAllSpaceLine); +        return false; +      } +      return true; +    } +    if (skip_b_break(Current) != Current && +        Column > MaxAllSpaceLineCharacters) { +      // Record the longest all-space line in case it's longer than the +      // discovered block indent. +      MaxAllSpaceLineCharacters = Column; +      LongestAllSpaceLine = Current; +    } + +    // Check for EOF. +    if (Current == End) { +      IsDone = true; +      return true; +    } + +    if (!consumeLineBreakIfPresent()) { +      IsDone = true; +      return true; +    } +    ++LineBreaks; +  } +  return true; +} + +bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, +                                    unsigned BlockExitIndent, bool &IsDone) { +  // Skip the indentation. +  while (Column < BlockIndent) { +    auto I = skip_s_space(Current); +    if (I == Current) +      break; +    Current = I; +    ++Column; +  } + +  if (skip_nb_char(Current) == Current) +    return true; + +  if (Column <= BlockExitIndent) { // End of the block literal. +    IsDone = true; +    return true; +  } + +  if (Column < BlockIndent) { +    if (Current != End && *Current == '#') { // Trailing comment. +      IsDone = true; +      return true; +    } +    setError("A text line is less indented than the block scalar", Current); +    return false; +  } +  return true; // A normal text line. +} + +bool Scanner::scanBlockScalar(bool IsLiteral) { +  // Eat '|' or '>' +  assert(*Current == '|' || *Current == '>'); +  skip(1); + +  char ChompingIndicator; +  unsigned BlockIndent; +  bool IsDone = false; +  if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) +    return false; +  if (IsDone) +    return true; + +  auto Start = Current; +  unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; +  unsigned LineBreaks = 0; +  if (BlockIndent == 0) { +    if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, +                               IsDone)) +      return false; +  } + +  // Scan the block's scalars body. +  SmallString<256> Str; +  while (!IsDone) { +    if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) +      return false; +    if (IsDone) +      break; + +    // Parse the current line. +    auto LineStart = Current; +    advanceWhile(&Scanner::skip_nb_char); +    if (LineStart != Current) { +      Str.append(LineBreaks, '\n'); +      Str.append(StringRef(LineStart, Current - LineStart)); +      LineBreaks = 0; +    } + +    // Check for EOF. +    if (Current == End) +      break; + +    if (!consumeLineBreakIfPresent()) +      break; +    ++LineBreaks; +  } + +  if (Current == End && !LineBreaks) +    // Ensure that there is at least one line break before the end of file. +    LineBreaks = 1; +  Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); + +  // New lines may start a simple key. +  if (!FlowLevel) +    IsSimpleKeyAllowed = true; + +  Token T; +  T.Kind = Token::TK_BlockScalar; +  T.Range = StringRef(Start, Current - Start); +  T.Value = Str.str().str(); +  TokenQueue.push_back(T); +  return true; +} + +bool Scanner::scanTag() { +  StringRef::iterator Start = Current; +  unsigned ColStart = Column; +  skip(1); // Eat !. +  if (Current == End || isBlankOrBreak(Current)); // An empty tag. +  else if (*Current == '<') { +    skip(1); +    scan_ns_uri_char(); +    if (!consume('>')) +      return false; +  } else { +    // FIXME: Actually parse the c-ns-shorthand-tag rule. +    Current = skip_while(&Scanner::skip_ns_char, Current); +  } + +  Token T; +  T.Kind = Token::TK_Tag; +  T.Range = StringRef(Start, Current - Start); +  TokenQueue.push_back(T); + +  // Tags can be simple keys. +  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); + +  IsSimpleKeyAllowed = false; + +  return true; +} + +bool Scanner::fetchMoreTokens() { +  if (IsStartOfStream) +    return scanStreamStart(); + +  scanToNextToken(); + +  if (Current == End) +    return scanStreamEnd(); + +  removeStaleSimpleKeyCandidates(); + +  unrollIndent(Column); + +  if (Column == 0 && *Current == '%') +    return scanDirective(); + +  if (Column == 0 && Current + 4 <= End +      && *Current == '-' +      && *(Current + 1) == '-' +      && *(Current + 2) == '-' +      && (Current + 3 == End || isBlankOrBreak(Current + 3))) +    return scanDocumentIndicator(true); + +  if (Column == 0 && Current + 4 <= End +      && *Current == '.' +      && *(Current + 1) == '.' +      && *(Current + 2) == '.' +      && (Current + 3 == End || isBlankOrBreak(Current + 3))) +    return scanDocumentIndicator(false); + +  if (*Current == '[') +    return scanFlowCollectionStart(true); + +  if (*Current == '{') +    return scanFlowCollectionStart(false); + +  if (*Current == ']') +    return scanFlowCollectionEnd(true); + +  if (*Current == '}') +    return scanFlowCollectionEnd(false); + +  if (*Current == ',') +    return scanFlowEntry(); + +  if (*Current == '-' && isBlankOrBreak(Current + 1)) +    return scanBlockEntry(); + +  if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) +    return scanKey(); + +  if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) +    return scanValue(); + +  if (*Current == '*') +    return scanAliasOrAnchor(true); + +  if (*Current == '&') +    return scanAliasOrAnchor(false); + +  if (*Current == '!') +    return scanTag(); + +  if (*Current == '|' && !FlowLevel) +    return scanBlockScalar(true); + +  if (*Current == '>' && !FlowLevel) +    return scanBlockScalar(false); + +  if (*Current == '\'') +    return scanFlowScalar(false); + +  if (*Current == '"') +    return scanFlowScalar(true); + +  // Get a plain scalar. +  StringRef FirstChar(Current, 1); +  if (!(isBlankOrBreak(Current) +        || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) +      || (*Current == '-' && !isBlankOrBreak(Current + 1)) +      || (!FlowLevel && (*Current == '?' || *Current == ':') +          && isBlankOrBreak(Current + 1)) +      || (!FlowLevel && *Current == ':' +                      && Current + 2 < End +                      && *(Current + 1) == ':' +                      && !isBlankOrBreak(Current + 2))) +    return scanPlainScalar(); + +  setError("Unrecognized character while tokenizing."); +  return false; +} + +Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, +               std::error_code *EC) +    : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} + +Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, +               std::error_code *EC) +    : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} + +Stream::~Stream() = default; + +bool Stream::failed() { return scanner->failed(); } + +void Stream::printError(Node *N, const Twine &Msg) { +  scanner->printError( N->getSourceRange().Start +                     , SourceMgr::DK_Error +                     , Msg +                     , N->getSourceRange()); +} + +document_iterator Stream::begin() { +  if (CurrentDoc) +    report_fatal_error("Can only iterate over the stream once"); + +  // Skip Stream-Start. +  scanner->getNext(); + +  CurrentDoc.reset(new Document(*this)); +  return document_iterator(CurrentDoc); +} + +document_iterator Stream::end() { +  return document_iterator(); +} + +void Stream::skip() { +  for (document_iterator i = begin(), e = end(); i != e; ++i) +    i->skip(); +} + +Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, +           StringRef T) +    : Doc(D), TypeID(Type), Anchor(A), Tag(T) { +  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); +  SourceRange = SMRange(Start, Start); +} + +std::string Node::getVerbatimTag() const { +  StringRef Raw = getRawTag(); +  if (!Raw.empty() && Raw != "!") { +    std::string Ret; +    if (Raw.find_last_of('!') == 0) { +      Ret = Doc->getTagMap().find("!")->second; +      Ret += Raw.substr(1); +      return Ret; +    } else if (Raw.startswith("!!")) { +      Ret = Doc->getTagMap().find("!!")->second; +      Ret += Raw.substr(2); +      return Ret; +    } else { +      StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); +      std::map<StringRef, StringRef>::const_iterator It = +          Doc->getTagMap().find(TagHandle); +      if (It != Doc->getTagMap().end()) +        Ret = It->second; +      else { +        Token T; +        T.Kind = Token::TK_Tag; +        T.Range = TagHandle; +        setError(Twine("Unknown tag handle ") + TagHandle, T); +      } +      Ret += Raw.substr(Raw.find_last_of('!') + 1); +      return Ret; +    } +  } + +  switch (getType()) { +  case NK_Null: +    return "tag:yaml.org,2002:null"; +  case NK_Scalar: +  case NK_BlockScalar: +    // TODO: Tag resolution. +    return "tag:yaml.org,2002:str"; +  case NK_Mapping: +    return "tag:yaml.org,2002:map"; +  case NK_Sequence: +    return "tag:yaml.org,2002:seq"; +  } + +  return ""; +} + +Token &Node::peekNext() { +  return Doc->peekNext(); +} + +Token Node::getNext() { +  return Doc->getNext(); +} + +Node *Node::parseBlockNode() { +  return Doc->parseBlockNode(); +} + +BumpPtrAllocator &Node::getAllocator() { +  return Doc->NodeAllocator; +} + +void Node::setError(const Twine &Msg, Token &Tok) const { +  Doc->setError(Msg, Tok); +} + +bool Node::failed() const { +  return Doc->failed(); +} + +StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { +  // TODO: Handle newlines properly. We need to remove leading whitespace. +  if (Value[0] == '"') { // Double quoted. +    // Pull off the leading and trailing "s. +    StringRef UnquotedValue = Value.substr(1, Value.size() - 2); +    // Search for characters that would require unescaping the value. +    StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); +    if (i != StringRef::npos) +      return unescapeDoubleQuoted(UnquotedValue, i, Storage); +    return UnquotedValue; +  } else if (Value[0] == '\'') { // Single quoted. +    // Pull off the leading and trailing 's. +    StringRef UnquotedValue = Value.substr(1, Value.size() - 2); +    StringRef::size_type i = UnquotedValue.find('\''); +    if (i != StringRef::npos) { +      // We're going to need Storage. +      Storage.clear(); +      Storage.reserve(UnquotedValue.size()); +      for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { +        StringRef Valid(UnquotedValue.begin(), i); +        Storage.insert(Storage.end(), Valid.begin(), Valid.end()); +        Storage.push_back('\''); +        UnquotedValue = UnquotedValue.substr(i + 2); +      } +      Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); +      return StringRef(Storage.begin(), Storage.size()); +    } +    return UnquotedValue; +  } +  // Plain or block. +  return Value.rtrim(' '); +} + +StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue +                                          , StringRef::size_type i +                                          , SmallVectorImpl<char> &Storage) +                                          const { +  // Use Storage to build proper value. +  Storage.clear(); +  Storage.reserve(UnquotedValue.size()); +  for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { +    // Insert all previous chars into Storage. +    StringRef Valid(UnquotedValue.begin(), i); +    Storage.insert(Storage.end(), Valid.begin(), Valid.end()); +    // Chop off inserted chars. +    UnquotedValue = UnquotedValue.substr(i); + +    assert(!UnquotedValue.empty() && "Can't be empty!"); + +    // Parse escape or line break. +    switch (UnquotedValue[0]) { +    case '\r': +    case '\n': +      Storage.push_back('\n'); +      if (   UnquotedValue.size() > 1 +          && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) +        UnquotedValue = UnquotedValue.substr(1); +      UnquotedValue = UnquotedValue.substr(1); +      break; +    default: +      if (UnquotedValue.size() == 1) +        // TODO: Report error. +        break; +      UnquotedValue = UnquotedValue.substr(1); +      switch (UnquotedValue[0]) { +      default: { +          Token T; +          T.Range = StringRef(UnquotedValue.begin(), 1); +          setError("Unrecognized escape code!", T); +          return ""; +        } +      case '\r': +      case '\n': +        // Remove the new line. +        if (   UnquotedValue.size() > 1 +            && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) +          UnquotedValue = UnquotedValue.substr(1); +        // If this was just a single byte newline, it will get skipped +        // below. +        break; +      case '0': +        Storage.push_back(0x00); +        break; +      case 'a': +        Storage.push_back(0x07); +        break; +      case 'b': +        Storage.push_back(0x08); +        break; +      case 't': +      case 0x09: +        Storage.push_back(0x09); +        break; +      case 'n': +        Storage.push_back(0x0A); +        break; +      case 'v': +        Storage.push_back(0x0B); +        break; +      case 'f': +        Storage.push_back(0x0C); +        break; +      case 'r': +        Storage.push_back(0x0D); +        break; +      case 'e': +        Storage.push_back(0x1B); +        break; +      case ' ': +        Storage.push_back(0x20); +        break; +      case '"': +        Storage.push_back(0x22); +        break; +      case '/': +        Storage.push_back(0x2F); +        break; +      case '\\': +        Storage.push_back(0x5C); +        break; +      case 'N': +        encodeUTF8(0x85, Storage); +        break; +      case '_': +        encodeUTF8(0xA0, Storage); +        break; +      case 'L': +        encodeUTF8(0x2028, Storage); +        break; +      case 'P': +        encodeUTF8(0x2029, Storage); +        break; +      case 'x': { +          if (UnquotedValue.size() < 3) +            // TODO: Report error. +            break; +          unsigned int UnicodeScalarValue; +          if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) +            // TODO: Report error. +            UnicodeScalarValue = 0xFFFD; +          encodeUTF8(UnicodeScalarValue, Storage); +          UnquotedValue = UnquotedValue.substr(2); +          break; +        } +      case 'u': { +          if (UnquotedValue.size() < 5) +            // TODO: Report error. +            break; +          unsigned int UnicodeScalarValue; +          if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) +            // TODO: Report error. +            UnicodeScalarValue = 0xFFFD; +          encodeUTF8(UnicodeScalarValue, Storage); +          UnquotedValue = UnquotedValue.substr(4); +          break; +        } +      case 'U': { +          if (UnquotedValue.size() < 9) +            // TODO: Report error. +            break; +          unsigned int UnicodeScalarValue; +          if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) +            // TODO: Report error. +            UnicodeScalarValue = 0xFFFD; +          encodeUTF8(UnicodeScalarValue, Storage); +          UnquotedValue = UnquotedValue.substr(8); +          break; +        } +      } +      UnquotedValue = UnquotedValue.substr(1); +    } +  } +  Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); +  return StringRef(Storage.begin(), Storage.size()); +} + +Node *KeyValueNode::getKey() { +  if (Key) +    return Key; +  // Handle implicit null keys. +  { +    Token &t = peekNext(); +    if (   t.Kind == Token::TK_BlockEnd +        || t.Kind == Token::TK_Value +        || t.Kind == Token::TK_Error) { +      return Key = new (getAllocator()) NullNode(Doc); +    } +    if (t.Kind == Token::TK_Key) +      getNext(); // skip TK_Key. +  } + +  // Handle explicit null keys. +  Token &t = peekNext(); +  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { +    return Key = new (getAllocator()) NullNode(Doc); +  } + +  // We've got a normal key. +  return Key = parseBlockNode(); +} + +Node *KeyValueNode::getValue() { +  if (Value) +    return Value; +  getKey()->skip(); +  if (failed()) +    return Value = new (getAllocator()) NullNode(Doc); + +  // Handle implicit null values. +  { +    Token &t = peekNext(); +    if (   t.Kind == Token::TK_BlockEnd +        || t.Kind == Token::TK_FlowMappingEnd +        || t.Kind == Token::TK_Key +        || t.Kind == Token::TK_FlowEntry +        || t.Kind == Token::TK_Error) { +      return Value = new (getAllocator()) NullNode(Doc); +    } + +    if (t.Kind != Token::TK_Value) { +      setError("Unexpected token in Key Value.", t); +      return Value = new (getAllocator()) NullNode(Doc); +    } +    getNext(); // skip TK_Value. +  } + +  // Handle explicit null values. +  Token &t = peekNext(); +  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { +    return Value = new (getAllocator()) NullNode(Doc); +  } + +  // We got a normal value. +  return Value = parseBlockNode(); +} + +void MappingNode::increment() { +  if (failed()) { +    IsAtEnd = true; +    CurrentEntry = nullptr; +    return; +  } +  if (CurrentEntry) { +    CurrentEntry->skip(); +    if (Type == MT_Inline) { +      IsAtEnd = true; +      CurrentEntry = nullptr; +      return; +    } +  } +  Token T = peekNext(); +  if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { +    // KeyValueNode eats the TK_Key. That way it can detect null keys. +    CurrentEntry = new (getAllocator()) KeyValueNode(Doc); +  } else if (Type == MT_Block) { +    switch (T.Kind) { +    case Token::TK_BlockEnd: +      getNext(); +      IsAtEnd = true; +      CurrentEntry = nullptr; +      break; +    default: +      setError("Unexpected token. Expected Key or Block End", T); +      LLVM_FALLTHROUGH; +    case Token::TK_Error: +      IsAtEnd = true; +      CurrentEntry = nullptr; +    } +  } else { +    switch (T.Kind) { +    case Token::TK_FlowEntry: +      // Eat the flow entry and recurse. +      getNext(); +      return increment(); +    case Token::TK_FlowMappingEnd: +      getNext(); +      LLVM_FALLTHROUGH; +    case Token::TK_Error: +      // Set this to end iterator. +      IsAtEnd = true; +      CurrentEntry = nullptr; +      break; +    default: +      setError( "Unexpected token. Expected Key, Flow Entry, or Flow " +                "Mapping End." +              , T); +      IsAtEnd = true; +      CurrentEntry = nullptr; +    } +  } +} + +void SequenceNode::increment() { +  if (failed()) { +    IsAtEnd = true; +    CurrentEntry = nullptr; +    return; +  } +  if (CurrentEntry) +    CurrentEntry->skip(); +  Token T = peekNext(); +  if (SeqType == ST_Block) { +    switch (T.Kind) { +    case Token::TK_BlockEntry: +      getNext(); +      CurrentEntry = parseBlockNode(); +      if (!CurrentEntry) { // An error occurred. +        IsAtEnd = true; +        CurrentEntry = nullptr; +      } +      break; +    case Token::TK_BlockEnd: +      getNext(); +      IsAtEnd = true; +      CurrentEntry = nullptr; +      break; +    default: +      setError( "Unexpected token. Expected Block Entry or Block End." +              , T); +      LLVM_FALLTHROUGH; +    case Token::TK_Error: +      IsAtEnd = true; +      CurrentEntry = nullptr; +    } +  } else if (SeqType == ST_Indentless) { +    switch (T.Kind) { +    case Token::TK_BlockEntry: +      getNext(); +      CurrentEntry = parseBlockNode(); +      if (!CurrentEntry) { // An error occurred. +        IsAtEnd = true; +        CurrentEntry = nullptr; +      } +      break; +    default: +    case Token::TK_Error: +      IsAtEnd = true; +      CurrentEntry = nullptr; +    } +  } else if (SeqType == ST_Flow) { +    switch (T.Kind) { +    case Token::TK_FlowEntry: +      // Eat the flow entry and recurse. +      getNext(); +      WasPreviousTokenFlowEntry = true; +      return increment(); +    case Token::TK_FlowSequenceEnd: +      getNext(); +      LLVM_FALLTHROUGH; +    case Token::TK_Error: +      // Set this to end iterator. +      IsAtEnd = true; +      CurrentEntry = nullptr; +      break; +    case Token::TK_StreamEnd: +    case Token::TK_DocumentEnd: +    case Token::TK_DocumentStart: +      setError("Could not find closing ]!", T); +      // Set this to end iterator. +      IsAtEnd = true; +      CurrentEntry = nullptr; +      break; +    default: +      if (!WasPreviousTokenFlowEntry) { +        setError("Expected , between entries!", T); +        IsAtEnd = true; +        CurrentEntry = nullptr; +        break; +      } +      // Otherwise it must be a flow entry. +      CurrentEntry = parseBlockNode(); +      if (!CurrentEntry) { +        IsAtEnd = true; +      } +      WasPreviousTokenFlowEntry = false; +      break; +    } +  } +} + +Document::Document(Stream &S) : stream(S), Root(nullptr) { +  // Tag maps starts with two default mappings. +  TagMap["!"] = "!"; +  TagMap["!!"] = "tag:yaml.org,2002:"; + +  if (parseDirectives()) +    expectToken(Token::TK_DocumentStart); +  Token &T = peekNext(); +  if (T.Kind == Token::TK_DocumentStart) +    getNext(); +} + +bool Document::skip()  { +  if (stream.scanner->failed()) +    return false; +  if (!Root) +    getRoot(); +  Root->skip(); +  Token &T = peekNext(); +  if (T.Kind == Token::TK_StreamEnd) +    return false; +  if (T.Kind == Token::TK_DocumentEnd) { +    getNext(); +    return skip(); +  } +  return true; +} + +Token &Document::peekNext() { +  return stream.scanner->peekNext(); +} + +Token Document::getNext() { +  return stream.scanner->getNext(); +} + +void Document::setError(const Twine &Message, Token &Location) const { +  stream.scanner->setError(Message, Location.Range.begin()); +} + +bool Document::failed() const { +  return stream.scanner->failed(); +} + +Node *Document::parseBlockNode() { +  Token T = peekNext(); +  // Handle properties. +  Token AnchorInfo; +  Token TagInfo; +parse_property: +  switch (T.Kind) { +  case Token::TK_Alias: +    getNext(); +    return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); +  case Token::TK_Anchor: +    if (AnchorInfo.Kind == Token::TK_Anchor) { +      setError("Already encountered an anchor for this node!", T); +      return nullptr; +    } +    AnchorInfo = getNext(); // Consume TK_Anchor. +    T = peekNext(); +    goto parse_property; +  case Token::TK_Tag: +    if (TagInfo.Kind == Token::TK_Tag) { +      setError("Already encountered a tag for this node!", T); +      return nullptr; +    } +    TagInfo = getNext(); // Consume TK_Tag. +    T = peekNext(); +    goto parse_property; +  default: +    break; +  } + +  switch (T.Kind) { +  case Token::TK_BlockEntry: +    // We got an unindented BlockEntry sequence. This is not terminated with +    // a BlockEnd. +    // Don't eat the TK_BlockEntry, SequenceNode needs it. +    return new (NodeAllocator) SequenceNode( stream.CurrentDoc +                                           , AnchorInfo.Range.substr(1) +                                           , TagInfo.Range +                                           , SequenceNode::ST_Indentless); +  case Token::TK_BlockSequenceStart: +    getNext(); +    return new (NodeAllocator) +      SequenceNode( stream.CurrentDoc +                  , AnchorInfo.Range.substr(1) +                  , TagInfo.Range +                  , SequenceNode::ST_Block); +  case Token::TK_BlockMappingStart: +    getNext(); +    return new (NodeAllocator) +      MappingNode( stream.CurrentDoc +                 , AnchorInfo.Range.substr(1) +                 , TagInfo.Range +                 , MappingNode::MT_Block); +  case Token::TK_FlowSequenceStart: +    getNext(); +    return new (NodeAllocator) +      SequenceNode( stream.CurrentDoc +                  , AnchorInfo.Range.substr(1) +                  , TagInfo.Range +                  , SequenceNode::ST_Flow); +  case Token::TK_FlowMappingStart: +    getNext(); +    return new (NodeAllocator) +      MappingNode( stream.CurrentDoc +                 , AnchorInfo.Range.substr(1) +                 , TagInfo.Range +                 , MappingNode::MT_Flow); +  case Token::TK_Scalar: +    getNext(); +    return new (NodeAllocator) +      ScalarNode( stream.CurrentDoc +                , AnchorInfo.Range.substr(1) +                , TagInfo.Range +                , T.Range); +  case Token::TK_BlockScalar: { +    getNext(); +    StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); +    StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); +    return new (NodeAllocator) +        BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), +                        TagInfo.Range, StrCopy, T.Range); +  } +  case Token::TK_Key: +    // Don't eat the TK_Key, KeyValueNode expects it. +    return new (NodeAllocator) +      MappingNode( stream.CurrentDoc +                 , AnchorInfo.Range.substr(1) +                 , TagInfo.Range +                 , MappingNode::MT_Inline); +  case Token::TK_DocumentStart: +  case Token::TK_DocumentEnd: +  case Token::TK_StreamEnd: +  default: +    // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not +    //       !!null null. +    return new (NodeAllocator) NullNode(stream.CurrentDoc); +  case Token::TK_Error: +    return nullptr; +  } +  llvm_unreachable("Control flow shouldn't reach here."); +  return nullptr; +} + +bool Document::parseDirectives() { +  bool isDirective = false; +  while (true) { +    Token T = peekNext(); +    if (T.Kind == Token::TK_TagDirective) { +      parseTAGDirective(); +      isDirective = true; +    } else if (T.Kind == Token::TK_VersionDirective) { +      parseYAMLDirective(); +      isDirective = true; +    } else +      break; +  } +  return isDirective; +} + +void Document::parseYAMLDirective() { +  getNext(); // Eat %YAML <version> +} + +void Document::parseTAGDirective() { +  Token Tag = getNext(); // %TAG <handle> <prefix> +  StringRef T = Tag.Range; +  // Strip %TAG +  T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); +  std::size_t HandleEnd = T.find_first_of(" \t"); +  StringRef TagHandle = T.substr(0, HandleEnd); +  StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); +  TagMap[TagHandle] = TagPrefix; +} + +bool Document::expectToken(int TK) { +  Token T = getNext(); +  if (T.Kind != TK) { +    setError("Unexpected token", T); +    return false; +  } +  return true; +}  | 
