diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h new file mode 100644 index 000000000000..6b08677e3369 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h @@ -0,0 +1,134 @@ +//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains FormatTokenLexer, which tokenizes a source file +/// into a token stream suitable for ClangFormat. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H +#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H + +#include "Encoding.h" +#include "FormatToken.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Regex.h" + +#include <stack> + +namespace clang { +namespace format { + +enum LexerState { + NORMAL, + TEMPLATE_STRING, + TOKEN_STASHED, +}; + +class FormatTokenLexer { +public: + FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, + const FormatStyle &Style, encoding::Encoding Encoding, + llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, + IdentifierTable &IdentTable); + + ArrayRef<FormatToken *> lex(); + + const AdditionalKeywords &getKeywords() { return Keywords; } + +private: + void tryMergePreviousTokens(); + + bool tryMergeLessLess(); + bool tryMergeNSStringLiteral(); + bool tryMergeJSPrivateIdentifier(); + bool tryMergeCSharpStringLiteral(); + bool tryMergeCSharpKeywordVariables(); + bool tryMergeCSharpDoubleQuestion(); + bool tryMergeCSharpNullConditional(); + bool tryTransformCSharpForEach(); + bool tryMergeForEach(); + bool tryTransformTryUsageForC(); + + bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); + + // Returns \c true if \p Tok can only be followed by an operand in JavaScript. + bool precedesOperand(FormatToken *Tok); + + bool canPrecedeRegexLiteral(FormatToken *Prev); + + // Tries to parse a JavaScript Regex literal starting at the current token, + // if that begins with a slash and is in a location where JavaScript allows + // regex literals. Changes the current token to a regex literal and updates + // its text if successful. + void tryParseJSRegexLiteral(); + + // Handles JavaScript template strings. + // + // JavaScript template strings use backticks ('`') as delimiters, and allow + // embedding expressions nested in ${expr-here}. Template strings can be + // nested recursively, i.e. expressions can contain template strings in turn. + // + // The code below parses starting from a backtick, up to a closing backtick or + // an opening ${. It also maintains a stack of lexing contexts to handle + // nested template parts by balancing curly braces. + void handleTemplateStrings(); + + void handleCSharpVerbatimAndInterpolatedStrings(); + + void tryParsePythonComment(); + + bool tryMerge_TMacro(); + + bool tryMergeConflictMarkers(); + + FormatToken *getStashedToken(); + + FormatToken *getNextToken(); + + FormatToken *FormatTok; + bool IsFirstToken; + std::stack<LexerState> StateStack; + unsigned Column; + unsigned TrailingWhitespace; + std::unique_ptr<Lexer> Lex; + const SourceManager &SourceMgr; + FileID ID; + const FormatStyle &Style; + IdentifierTable &IdentTable; + AdditionalKeywords Keywords; + encoding::Encoding Encoding; + llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; + // Index (in 'Tokens') of the last token that starts a new line. + unsigned FirstInLineIndex; + SmallVector<FormatToken *, 16> Tokens; + + llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros; + + bool FormattingDisabled; + + llvm::Regex MacroBlockBeginRegex; + llvm::Regex MacroBlockEndRegex; + + // Targets that may appear inside a C# attribute. + static const llvm::StringSet<> CSharpAttributeTargets; + + void readRawToken(FormatToken &Tok); + + void resetLexer(unsigned Offset); +}; + +} // namespace format +} // namespace clang + +#endif |