diff options
Diffstat (limited to 'lib/Format/FormatTokenLexer.cpp')
-rw-r--r-- | lib/Format/FormatTokenLexer.cpp | 88 |
1 files changed, 70 insertions, 18 deletions
diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 45c3ae1afe5f..199d2974c5c7 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -24,10 +24,10 @@ namespace clang { namespace format { FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, - const FormatStyle &Style, + unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), @@ -50,6 +50,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { tryParseJSRegexLiteral(); handleTemplateStrings(); } + if (Style.Language == FormatStyle::LK_TextProto) + tryParsePythonComment(); tryMergePreviousTokens(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; @@ -96,14 +98,8 @@ void FormatTokenLexer::tryMergePreviousTokens() { } if (Style.Language == FormatStyle::LK_Java) { - static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater, - tok::greater, - tok::greater}; - static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater, - tok::greater, - tok::greaterequal}; - if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator)) - return; + static const tok::TokenKind JavaRightLogicalShiftAssign[] = { + tok::greater, tok::greater, tok::greaterequal}; if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) return; } @@ -162,9 +158,8 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, return false; unsigned AddLength = 0; for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || - First[i]->WhitespaceRange.getBegin() != - First[i]->WhitespaceRange.getEnd()) + if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != + First[i]->WhitespaceRange.getEnd()) return false; AddLength += First[i]->TokenText.size(); } @@ -337,6 +332,27 @@ void FormatTokenLexer::handleTemplateStrings() { resetLexer(SourceMgr.getFileOffset(loc)); } +void FormatTokenLexer::tryParsePythonComment() { + FormatToken *HashToken = Tokens.back(); + if (HashToken->isNot(tok::hash)) + return; + // Turn the remainder of this line into a comment. + const char *CommentBegin = + Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#" + size_t From = CommentBegin - Lex->getBuffer().begin(); + size_t To = Lex->getBuffer().find_first_of('\n', From); + if (To == StringRef::npos) + To = Lex->getBuffer().size(); + size_t Len = To - From; + HashToken->Type = TT_LineComment; + HashToken->Tok.setKind(tok::comment); + HashToken->TokenText = Lex->getBuffer().substr(From, Len); + SourceLocation Loc = To < Lex->getBuffer().size() + ? Lex->getSourceLocation(CommentBegin + Len) + : SourceMgr.getLocForEndOfFile(ID); + resetLexer(SourceMgr.getFileOffset(Loc)); +} + bool FormatTokenLexer::tryMerge_TMacro() { if (Tokens.size() < 4) return false; @@ -529,17 +545,53 @@ FormatToken *FormatTokenLexer::getNextToken() { readRawToken(*FormatTok); } + // JavaScript and Java do not allow to escape the end of the line with a + // backslash. Backslashes are syntax errors in plain source, but can occur in + // comments. When a single line comment ends with a \, it'll cause the next + // line of code to be lexed as a comment, breaking formatting. The code below + // finds comments that contain a backslash followed by a line break, truncates + // the comment token at the backslash, and resets the lexer to restart behind + // the backslash. + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) { + size_t BackslashPos = FormatTok->TokenText.find('\\'); + while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { + const char *Offset = Lex->getBufferLocation(); + Offset -= FormatTok->TokenText.size(); + Offset += BackslashPos + 1; + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); + FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); + FormatTok->ColumnWidth = encoding::columnWidthWithTabs( + FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, + Encoding); + break; + } + BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); + } + } + // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. // FIXME: Add a more explicit test. - while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && - FormatTok->TokenText[1] == '\n') { + while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') { + unsigned SkippedWhitespace = 0; + if (FormatTok->TokenText.size() > 2 && + (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n')) + SkippedWhitespace = 3; + else if (FormatTok->TokenText[1] == '\n') + SkippedWhitespace = 2; + else + break; + ++FormatTok->NewlinesBefore; - WhitespaceLength += 2; - FormatTok->LastNewlineOffset = 2; + WhitespaceLength += SkippedWhitespace; + FormatTok->LastNewlineOffset = SkippedWhitespace; Column = 0; - FormatTok->TokenText = FormatTok->TokenText.substr(2); + FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace); } FormatTok->WhitespaceRange = SourceRange( |