summaryrefslogtreecommitdiff
path: root/lib/Format/FormatTokenLexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format/FormatTokenLexer.cpp')
-rw-r--r--lib/Format/FormatTokenLexer.cpp88
1 files changed, 70 insertions, 18 deletions
diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp
index 45c3ae1afe5f..199d2974c5c7 100644
--- a/lib/Format/FormatTokenLexer.cpp
+++ b/lib/Format/FormatTokenLexer.cpp
@@ -24,10 +24,10 @@ namespace clang {
namespace format {
FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
- const FormatStyle &Style,
+ unsigned Column, const FormatStyle &Style,
encoding::Encoding Encoding)
: FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
- Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+ Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
Style(Style), IdentTable(getFormattingLangOpts(Style)),
Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
@@ -50,6 +50,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
tryParseJSRegexLiteral();
handleTemplateStrings();
}
+ if (Style.Language == FormatStyle::LK_TextProto)
+ tryParsePythonComment();
tryMergePreviousTokens();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
@@ -96,14 +98,8 @@ void FormatTokenLexer::tryMergePreviousTokens() {
}
if (Style.Language == FormatStyle::LK_Java) {
- static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater,
- tok::greater,
- tok::greater};
- static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater,
- tok::greater,
- tok::greaterequal};
- if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator))
- return;
+ static const tok::TokenKind JavaRightLogicalShiftAssign[] = {
+ tok::greater, tok::greater, tok::greaterequal};
if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
return;
}
@@ -162,9 +158,8 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
return false;
unsigned AddLength = 0;
for (unsigned i = 1; i < Kinds.size(); ++i) {
- if (!First[i]->is(Kinds[i]) ||
- First[i]->WhitespaceRange.getBegin() !=
- First[i]->WhitespaceRange.getEnd())
+ if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
+ First[i]->WhitespaceRange.getEnd())
return false;
AddLength += First[i]->TokenText.size();
}
@@ -337,6 +332,27 @@ void FormatTokenLexer::handleTemplateStrings() {
resetLexer(SourceMgr.getFileOffset(loc));
}
+void FormatTokenLexer::tryParsePythonComment() {
+ FormatToken *HashToken = Tokens.back();
+ if (HashToken->isNot(tok::hash))
+ return;
+ // Turn the remainder of this line into a comment.
+ const char *CommentBegin =
+ Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
+ size_t From = CommentBegin - Lex->getBuffer().begin();
+ size_t To = Lex->getBuffer().find_first_of('\n', From);
+ if (To == StringRef::npos)
+ To = Lex->getBuffer().size();
+ size_t Len = To - From;
+ HashToken->Type = TT_LineComment;
+ HashToken->Tok.setKind(tok::comment);
+ HashToken->TokenText = Lex->getBuffer().substr(From, Len);
+ SourceLocation Loc = To < Lex->getBuffer().size()
+ ? Lex->getSourceLocation(CommentBegin + Len)
+ : SourceMgr.getLocForEndOfFile(ID);
+ resetLexer(SourceMgr.getFileOffset(Loc));
+}
+
bool FormatTokenLexer::tryMerge_TMacro() {
if (Tokens.size() < 4)
return false;
@@ -529,17 +545,53 @@ FormatToken *FormatTokenLexer::getNextToken() {
readRawToken(*FormatTok);
}
+ // JavaScript and Java do not allow to escape the end of the line with a
+ // backslash. Backslashes are syntax errors in plain source, but can occur in
+ // comments. When a single line comment ends with a \, it'll cause the next
+ // line of code to be lexed as a comment, breaking formatting. The code below
+ // finds comments that contain a backslash followed by a line break, truncates
+ // the comment token at the backslash, and resets the lexer to restart behind
+ // the backslash.
+ if ((Style.Language == FormatStyle::LK_JavaScript ||
+ Style.Language == FormatStyle::LK_Java) &&
+ FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {
+ size_t BackslashPos = FormatTok->TokenText.find('\\');
+ while (BackslashPos != StringRef::npos) {
+ if (BackslashPos + 1 < FormatTok->TokenText.size() &&
+ FormatTok->TokenText[BackslashPos + 1] == '\n') {
+ const char *Offset = Lex->getBufferLocation();
+ Offset -= FormatTok->TokenText.size();
+ Offset += BackslashPos + 1;
+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+ FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1);
+ FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+ FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
+ Encoding);
+ break;
+ }
+ BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
+ }
+ }
+
// In case the token starts with escaped newlines, we want to
// take them into account as whitespace - this pattern is quite frequent
// in macro definitions.
// FIXME: Add a more explicit test.
- while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
- FormatTok->TokenText[1] == '\n') {
+ while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') {
+ unsigned SkippedWhitespace = 0;
+ if (FormatTok->TokenText.size() > 2 &&
+ (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n'))
+ SkippedWhitespace = 3;
+ else if (FormatTok->TokenText[1] == '\n')
+ SkippedWhitespace = 2;
+ else
+ break;
+
++FormatTok->NewlinesBefore;
- WhitespaceLength += 2;
- FormatTok->LastNewlineOffset = 2;
+ WhitespaceLength += SkippedWhitespace;
+ FormatTok->LastNewlineOffset = SkippedWhitespace;
Column = 0;
- FormatTok->TokenText = FormatTok->TokenText.substr(2);
+ FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace);
}
FormatTok->WhitespaceRange = SourceRange(