aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h')
-rw-r--r--contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h134
1 files changed, 134 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h
new file mode 100644
index 000000000000..6b08677e3369
--- /dev/null
+++ b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h
@@ -0,0 +1,134 @@
+//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains FormatTokenLexer, which tokenizes a source file
+/// into a token stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Regex.h"
+
+#include <stack>
+
+namespace clang {
+namespace format {
+
+enum LexerState {
+ NORMAL,
+ TEMPLATE_STRING,
+ TOKEN_STASHED,
+};
+
+class FormatTokenLexer {
+public:
+ FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
+ const FormatStyle &Style, encoding::Encoding Encoding,
+ llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+ IdentifierTable &IdentTable);
+
+ ArrayRef<FormatToken *> lex();
+
+ const AdditionalKeywords &getKeywords() { return Keywords; }
+
+private:
+ void tryMergePreviousTokens();
+
+ bool tryMergeLessLess();
+ bool tryMergeNSStringLiteral();
+ bool tryMergeJSPrivateIdentifier();
+ bool tryMergeCSharpStringLiteral();
+ bool tryMergeCSharpKeywordVariables();
+ bool tryMergeCSharpDoubleQuestion();
+ bool tryMergeCSharpNullConditional();
+ bool tryTransformCSharpForEach();
+ bool tryMergeForEach();
+ bool tryTransformTryUsageForC();
+
+ bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
+
+ // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+ bool precedesOperand(FormatToken *Tok);
+
+ bool canPrecedeRegexLiteral(FormatToken *Prev);
+
+ // Tries to parse a JavaScript Regex literal starting at the current token,
+ // if that begins with a slash and is in a location where JavaScript allows
+ // regex literals. Changes the current token to a regex literal and updates
+ // its text if successful.
+ void tryParseJSRegexLiteral();
+
+ // Handles JavaScript template strings.
+ //
+ // JavaScript template strings use backticks ('`') as delimiters, and allow
+ // embedding expressions nested in ${expr-here}. Template strings can be
+ // nested recursively, i.e. expressions can contain template strings in turn.
+ //
+ // The code below parses starting from a backtick, up to a closing backtick or
+ // an opening ${. It also maintains a stack of lexing contexts to handle
+ // nested template parts by balancing curly braces.
+ void handleTemplateStrings();
+
+ void handleCSharpVerbatimAndInterpolatedStrings();
+
+ void tryParsePythonComment();
+
+ bool tryMerge_TMacro();
+
+ bool tryMergeConflictMarkers();
+
+ FormatToken *getStashedToken();
+
+ FormatToken *getNextToken();
+
+ FormatToken *FormatTok;
+ bool IsFirstToken;
+ std::stack<LexerState> StateStack;
+ unsigned Column;
+ unsigned TrailingWhitespace;
+ std::unique_ptr<Lexer> Lex;
+ const SourceManager &SourceMgr;
+ FileID ID;
+ const FormatStyle &Style;
+ IdentifierTable &IdentTable;
+ AdditionalKeywords Keywords;
+ encoding::Encoding Encoding;
+ llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
+ // Index (in 'Tokens') of the last token that starts a new line.
+ unsigned FirstInLineIndex;
+ SmallVector<FormatToken *, 16> Tokens;
+
+ llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
+
+ bool FormattingDisabled;
+
+ llvm::Regex MacroBlockBeginRegex;
+ llvm::Regex MacroBlockEndRegex;
+
+ // Targets that may appear inside a C# attribute.
+ static const llvm::StringSet<> CSharpAttributeTargets;
+
+ void readRawToken(FormatToken &Tok);
+
+ void resetLexer(unsigned Offset);
+};
+
+} // namespace format
+} // namespace clang
+
+#endif