1 files changed, 134 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h
new file mode 100644
index 000000000000..6b08677e3369
--- /dev/null
+++ b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h
@@ -0,0 +1,134 @@
+//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains FormatTokenLexer, which tokenizes a source file
+/// into a token stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Regex.h"
+
+#include <stack>
+
+namespace clang {
+namespace format {
+
+enum LexerState {
+  NORMAL,
+  TEMPLATE_STRING,
+  TOKEN_STASHED,
+};
+
+class FormatTokenLexer {
+public:
+  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
+                   const FormatStyle &Style, encoding::Encoding Encoding,
+                   llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+                   IdentifierTable &IdentTable);
+
+  ArrayRef<FormatToken *> lex();
+
+  const AdditionalKeywords &getKeywords() { return Keywords; }
+
+private:
+  void tryMergePreviousTokens();
+
+  bool tryMergeLessLess();
+  bool tryMergeNSStringLiteral();
+  bool tryMergeJSPrivateIdentifier();
+  bool tryMergeCSharpStringLiteral();
+  bool tryMergeCSharpKeywordVariables();
+  bool tryMergeCSharpDoubleQuestion();
+  bool tryMergeCSharpNullConditional();
+  bool tryTransformCSharpForEach();
+  bool tryMergeForEach();
+  bool tryTransformTryUsageForC();
+
+  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
+
+  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+  bool precedesOperand(FormatToken *Tok);
+
+  bool canPrecedeRegexLiteral(FormatToken *Prev);
+
+  // Tries to parse a JavaScript Regex literal starting at the current token,
+  // if that begins with a slash and is in a location where JavaScript allows
+  // regex literals. Changes the current token to a regex literal and updates
+  // its text if successful.
+  void tryParseJSRegexLiteral();
+
+  // Handles JavaScript template strings.
+  //
+  // JavaScript template strings use backticks ('`') as delimiters, and allow
+  // embedding expressions nested in ${expr-here}. Template strings can be
+  // nested recursively, i.e. expressions can contain template strings in turn.
+  //
+  // The code below parses starting from a backtick, up to a closing backtick or
+  // an opening ${. It also maintains a stack of lexing contexts to handle
+  // nested template parts by balancing curly braces.
+  void handleTemplateStrings();
+
+  void handleCSharpVerbatimAndInterpolatedStrings();
+
+  void tryParsePythonComment();
+
+  bool tryMerge_TMacro();
+
+  bool tryMergeConflictMarkers();
+
+  FormatToken *getStashedToken();
+
+  FormatToken *getNextToken();
+
+  FormatToken *FormatTok;
+  bool IsFirstToken;
+  std::stack<LexerState> StateStack;
+  unsigned Column;
+  unsigned TrailingWhitespace;
+  std::unique_ptr<Lexer> Lex;
+  const SourceManager &SourceMgr;
+  FileID ID;
+  const FormatStyle &Style;
+  IdentifierTable &IdentTable;
+  AdditionalKeywords Keywords;
+  encoding::Encoding Encoding;
+  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
+  // Index (in 'Tokens') of the last token that starts a new line.
+  unsigned FirstInLineIndex;
+  SmallVector<FormatToken *, 16> Tokens;
+
+  llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
+
+  bool FormattingDisabled;
+
+  llvm::Regex MacroBlockBeginRegex;
+  llvm::Regex MacroBlockEndRegex;
+
+  // Targets that may appear inside a C# attribute.
+  static const llvm::StringSet<> CSharpAttributeTargets;
+
+  void readRawToken(FormatToken &Tok);
+
+  void resetLexer(unsigned Offset);
+};
+
+} // namespace format
+} // namespace clang
+
+#endif