summaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp78
1 files changed, 64 insertions, 14 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index e8588a771a431..d4723091114a1 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1015,7 +1015,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
StringRef Lexer::getImmediateMacroNameForDiagnostics(
SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) {
assert(Loc.isMacroID() && "Only reasonable to call this on macros");
- // Walk past macro argument expanions.
+ // Walk past macro argument expansions.
while (SM.isMacroArgExpansion(Loc))
Loc = SM.getImmediateExpansionRange(Loc).getBegin();
@@ -1510,8 +1510,17 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
bool operator<(HomoglyphPair R) const { return Character < R.Character; }
};
static constexpr HomoglyphPair SortedHomoglyphs[] = {
+ {U'\u00ad', 0}, // SOFT HYPHEN
{U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK
{U'\u037e', ';'}, // GREEK QUESTION MARK
+ {U'\u200b', 0}, // ZERO WIDTH SPACE
+ {U'\u200c', 0}, // ZERO WIDTH NON-JOINER
+ {U'\u200d', 0}, // ZERO WIDTH JOINER
+ {U'\u2060', 0}, // WORD JOINER
+ {U'\u2061', 0}, // FUNCTION APPLICATION
+ {U'\u2062', 0}, // INVISIBLE TIMES
+ {U'\u2063', 0}, // INVISIBLE SEPARATOR
+ {U'\u2064', 0}, // INVISIBLE PLUS
{U'\u2212', '-'}, // MINUS SIGN
{U'\u2215', '/'}, // DIVISION SLASH
{U'\u2216', '\\'}, // SET MINUS
@@ -1521,6 +1530,7 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
{U'\u2236', ':'}, // RATIO
{U'\u223c', '~'}, // TILDE OPERATOR
{U'\ua789', ':'}, // MODIFIER LETTER COLON
+ {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE
{U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK
{U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN
{U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN
@@ -1560,9 +1570,14 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
llvm::raw_svector_ostream CharOS(CharBuf);
llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
}
- const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
- Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
- << Range << CharBuf << LooksLikeStr;
+ if (Homoglyph->LooksLike) {
+ const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
+ Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
+ << Range << CharBuf << LooksLikeStr;
+ } else {
+ Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
+ << Range << CharBuf;
+ }
}
}
@@ -1881,6 +1896,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
/// either " or L" or u8" or u" or U".
bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
tok::TokenKind Kind) {
+ const char *AfterQuote = CurPtr;
// Does this string contain the \0 character?
const char *NulCharacter = nullptr;
@@ -1909,8 +1925,11 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
if (C == 0) {
if (isCodeCompletionPoint(CurPtr-1)) {
- PP->CodeCompleteNaturalLanguage();
- FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ if (ParsingFilename)
+ codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false);
+ else
+ PP->CodeCompleteNaturalLanguage();
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
cutOffLexing();
return true;
}
@@ -2028,9 +2047,8 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
if (C == '\\')
C = getAndAdvanceChar(CurPtr, Result);
- if (C == '\n' || C == '\r' || // Newline.
- (C == 0 && (CurPtr-1 == BufferEnd || // End of file.
- isCodeCompletionPoint(CurPtr-1)))) {
+ if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file.
// If the filename is unterminated, then it must just be a lone <
// character. Return this as such.
FormTokenWithChars(Result, AfterLessPos, tok::less);
@@ -2038,6 +2056,12 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
}
if (C == 0) {
+ if (isCodeCompletionPoint(CurPtr - 1)) {
+ codeCompleteIncludedFile(AfterLessPos, CurPtr - 1, /*IsAngled=*/true);
+ cutOffLexing();
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
+ return true;
+ }
NulCharacter = CurPtr-1;
}
C = getAndAdvanceChar(CurPtr, Result);
@@ -2054,6 +2078,34 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
return true;
}
+void Lexer::codeCompleteIncludedFile(const char *PathStart,
+ const char *CompletionPoint,
+ bool IsAngled) {
+ // Completion only applies to the filename, after the last slash.
+ StringRef PartialPath(PathStart, CompletionPoint - PathStart);
+ auto Slash = PartialPath.find_last_of(LangOpts.MSVCCompat ? "/\\" : "/");
+ StringRef Dir =
+ (Slash == StringRef::npos) ? "" : PartialPath.take_front(Slash);
+ const char *StartOfFilename =
+ (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
+ // Code completion filter range is the filename only, up to completion point.
+ PP->setCodeCompletionIdentifierInfo(&PP->getIdentifierTable().get(
+ StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
+ // We should replace the characters up to the closing quote, if any.
+ while (CompletionPoint < BufferEnd) {
+ char Next = *(CompletionPoint + 1);
+ if (Next == 0 || Next == '\r' || Next == '\n')
+ break;
+ ++CompletionPoint;
+ if (Next == (IsAngled ? '>' : '"'))
+ break;
+ }
+ PP->setCodeCompletionTokenRange(
+ FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
+ FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
+ PP->CodeCompleteIncludedFile(Dir, IsAngled);
+}
+
/// LexCharConstant - Lex the remainder of a character constant, after having
/// lexed either ' or L' or u8' or u' or U'.
bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
@@ -3033,6 +3085,8 @@ bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C,
makeCharRange(*this, BufferPtr, CurPtr),
/*IsFirst=*/true);
+ maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C,
+ makeCharRange(*this, BufferPtr, CurPtr));
}
MIOpt.ReadToken();
@@ -3790,7 +3844,7 @@ LexNextToken:
case '@':
// Objective C support.
- if (CurPtr[-1] == '@' && LangOpts.ObjC1)
+ if (CurPtr[-1] == '@' && LangOpts.ObjC)
Kind = tok::at;
else
Kind = tok::unknown;
@@ -3827,7 +3881,6 @@ LexNextToken:
// We can't just reset CurPtr to BufferPtr because BufferPtr may point to
// an escaped newline.
--CurPtr;
- const char *UTF8StartPtr = CurPtr;
llvm::ConversionResult Status =
llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
(const llvm::UTF8 *)BufferEnd,
@@ -3842,9 +3895,6 @@ LexNextToken:
// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;
}
- if (!isLexingRawMode())
- maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint,
- makeCharRange(*this, UTF8StartPtr, CurPtr));
return LexUnicode(Result, CodePoint, CurPtr);
}