diff options
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 78 |
1 files changed, 64 insertions, 14 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index e8588a771a431..d4723091114a1 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -1015,7 +1015,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc, StringRef Lexer::getImmediateMacroNameForDiagnostics( SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { assert(Loc.isMacroID() && "Only reasonable to call this on macros"); - // Walk past macro argument expanions. + // Walk past macro argument expansions. while (SM.isMacroArgExpansion(Loc)) Loc = SM.getImmediateExpansionRange(Loc).getBegin(); @@ -1510,8 +1510,17 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, bool operator<(HomoglyphPair R) const { return Character < R.Character; } }; static constexpr HomoglyphPair SortedHomoglyphs[] = { + {U'\u00ad', 0}, // SOFT HYPHEN {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK {U'\u037e', ';'}, // GREEK QUESTION MARK + {U'\u200b', 0}, // ZERO WIDTH SPACE + {U'\u200c', 0}, // ZERO WIDTH NON-JOINER + {U'\u200d', 0}, // ZERO WIDTH JOINER + {U'\u2060', 0}, // WORD JOINER + {U'\u2061', 0}, // FUNCTION APPLICATION + {U'\u2062', 0}, // INVISIBLE TIMES + {U'\u2063', 0}, // INVISIBLE SEPARATOR + {U'\u2064', 0}, // INVISIBLE PLUS {U'\u2212', '-'}, // MINUS SIGN {U'\u2215', '/'}, // DIVISION SLASH {U'\u2216', '\\'}, // SET MINUS @@ -1521,6 +1530,7 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, {U'\u2236', ':'}, // RATIO {U'\u223c', '~'}, // TILDE OPERATOR {U'\ua789', ':'}, // MODIFIER LETTER COLON + {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN @@ -1560,9 +1570,14 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, llvm::raw_svector_ostream CharOS(CharBuf); llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4); } - const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; - Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) - << Range << CharBuf << LooksLikeStr; + if (Homoglyph->LooksLike) { + const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; + Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) + << Range << CharBuf << LooksLikeStr; + } else { + Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width) + << Range << CharBuf; + } } } @@ -1881,6 +1896,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, /// either " or L" or u8" or u" or U". bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, tok::TokenKind Kind) { + const char *AfterQuote = CurPtr; // Does this string contain the \0 character? const char *NulCharacter = nullptr; @@ -1909,8 +1925,11 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, if (C == 0) { if (isCodeCompletionPoint(CurPtr-1)) { - PP->CodeCompleteNaturalLanguage(); - FormTokenWithChars(Result, CurPtr-1, tok::unknown); + if (ParsingFilename) + codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false); + else + PP->CodeCompleteNaturalLanguage(); + FormTokenWithChars(Result, CurPtr - 1, tok::unknown); cutOffLexing(); return true; } @@ -2028,9 +2047,8 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { if (C == '\\') C = getAndAdvanceChar(CurPtr, Result); - if (C == '\n' || C == '\r' || // Newline. - (C == 0 && (CurPtr-1 == BufferEnd || // End of file. - isCodeCompletionPoint(CurPtr-1)))) { + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file. // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); @@ -2038,6 +2056,12 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { } if (C == 0) { + if (isCodeCompletionPoint(CurPtr - 1)) { + codeCompleteIncludedFile(AfterLessPos, CurPtr - 1, /*IsAngled=*/true); + cutOffLexing(); + FormTokenWithChars(Result, CurPtr - 1, tok::unknown); + return true; + } NulCharacter = CurPtr-1; } C = getAndAdvanceChar(CurPtr, Result); @@ -2054,6 +2078,34 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { return true; } +void Lexer::codeCompleteIncludedFile(const char *PathStart, + const char *CompletionPoint, + bool IsAngled) { + // Completion only applies to the filename, after the last slash. + StringRef PartialPath(PathStart, CompletionPoint - PathStart); + auto Slash = PartialPath.find_last_of(LangOpts.MSVCCompat ? "/\\" : "/"); + StringRef Dir = + (Slash == StringRef::npos) ? "" : PartialPath.take_front(Slash); + const char *StartOfFilename = + (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1; + // Code completion filter range is the filename only, up to completion point. + PP->setCodeCompletionIdentifierInfo(&PP->getIdentifierTable().get( + StringRef(StartOfFilename, CompletionPoint - StartOfFilename))); + // We should replace the characters up to the closing quote, if any. + while (CompletionPoint < BufferEnd) { + char Next = *(CompletionPoint + 1); + if (Next == 0 || Next == '\r' || Next == '\n') + break; + ++CompletionPoint; + if (Next == (IsAngled ? '>' : '"')) + break; + } + PP->setCodeCompletionTokenRange( + FileLoc.getLocWithOffset(StartOfFilename - BufferStart), + FileLoc.getLocWithOffset(CompletionPoint - BufferStart)); + PP->CodeCompleteIncludedFile(Dir, IsAngled); +} + /// LexCharConstant - Lex the remainder of a character constant, after having /// lexed either ' or L' or u8' or u' or U'. bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, @@ -3033,6 +3085,8 @@ bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, makeCharRange(*this, BufferPtr, CurPtr), /*IsFirst=*/true); + maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C, + makeCharRange(*this, BufferPtr, CurPtr)); } MIOpt.ReadToken(); @@ -3790,7 +3844,7 @@ LexNextToken: case '@': // Objective C support. - if (CurPtr[-1] == '@' && LangOpts.ObjC1) + if (CurPtr[-1] == '@' && LangOpts.ObjC) Kind = tok::at; else Kind = tok::unknown; @@ -3827,7 +3881,6 @@ LexNextToken: // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurPtr; - const char *UTF8StartPtr = CurPtr; llvm::ConversionResult Status = llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)BufferEnd, @@ -3842,9 +3895,6 @@ LexNextToken: // (We manually eliminate the tail call to avoid recursion.) goto LexNextToken; } - if (!isLexingRawMode()) - maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint, - makeCharRange(*this, UTF8StartPtr, CurPtr)); return LexUnicode(Result, CodePoint, CurPtr); } |