diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
| commit | 0b57cec536236d46e3dba9bd041533462f33dbb7 (patch) | |
| tree | 56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h | |
| parent | 718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff) | |
Notes
Diffstat (limited to 'contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h')
| -rw-r--r-- | contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h new file mode 100644 index 000000000000..1e096fc50205 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Format/FormatTokenLexer.h @@ -0,0 +1,123 @@ +//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains FormatTokenLexer, which tokenizes a source file +/// into a token stream suitable for ClangFormat. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H +#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H + +#include "Encoding.h" +#include "FormatToken.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/Support/Regex.h" + +#include <stack> + +namespace clang { +namespace format { + +enum LexerState { + NORMAL, + TEMPLATE_STRING, + TOKEN_STASHED, +}; + +class FormatTokenLexer { +public: + FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, + const FormatStyle &Style, encoding::Encoding Encoding); + + ArrayRef<FormatToken *> lex(); + + const AdditionalKeywords &getKeywords() { return Keywords; } + +private: + void tryMergePreviousTokens(); + + bool tryMergeLessLess(); + bool tryMergeNSStringLiteral(); + bool tryMergeJSPrivateIdentifier(); + bool tryMergeCSharpVerbatimStringLiteral(); + bool tryMergeCSharpKeywordVariables(); + bool tryMergeCSharpNullConditionals(); + bool tryMergeCSharpDoubleQuestion(); + + bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); + + // Returns \c true if \p Tok can only be followed by an operand in JavaScript. + bool precedesOperand(FormatToken *Tok); + + bool canPrecedeRegexLiteral(FormatToken *Prev); + + // Tries to parse a JavaScript Regex literal starting at the current token, + // if that begins with a slash and is in a location where JavaScript allows + // regex literals. Changes the current token to a regex literal and updates + // its text if successful. + void tryParseJSRegexLiteral(); + + // Handles JavaScript template strings. + // + // JavaScript template strings use backticks ('`') as delimiters, and allow + // embedding expressions nested in ${expr-here}. Template strings can be + // nested recursively, i.e. expressions can contain template strings in turn. + // + // The code below parses starting from a backtick, up to a closing backtick or + // an opening ${. It also maintains a stack of lexing contexts to handle + // nested template parts by balancing curly braces. + void handleTemplateStrings(); + + void tryParsePythonComment(); + + bool tryMerge_TMacro(); + + bool tryMergeConflictMarkers(); + + FormatToken *getStashedToken(); + + FormatToken *getNextToken(); + + FormatToken *FormatTok; + bool IsFirstToken; + std::stack<LexerState> StateStack; + unsigned Column; + unsigned TrailingWhitespace; + std::unique_ptr<Lexer> Lex; + const SourceManager &SourceMgr; + FileID ID; + const FormatStyle &Style; + IdentifierTable IdentTable; + AdditionalKeywords Keywords; + encoding::Encoding Encoding; + llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; + // Index (in 'Tokens') of the last token that starts a new line. + unsigned FirstInLineIndex; + SmallVector<FormatToken *, 16> Tokens; + + llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros; + + bool FormattingDisabled; + + llvm::Regex MacroBlockBeginRegex; + llvm::Regex MacroBlockEndRegex; + + void readRawToken(FormatToken &Tok); + + void resetLexer(unsigned Offset); +}; + +} // namespace format +} // namespace clang + +#endif |
