diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
| commit | 2298981669bf3bd63335a4be179bc0f96823a8f4 (patch) | |
| tree | 1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /lib/Lex | |
| parent | 9a83721404652cea39e9f02ae3e3b5c964602a5c (diff) | |
Diffstat (limited to 'lib/Lex')
| -rw-r--r-- | lib/Lex/DependencyDirectivesSourceMinimizer.cpp | 763 | ||||
| -rw-r--r-- | lib/Lex/HeaderMap.cpp | 7 | ||||
| -rw-r--r-- | lib/Lex/HeaderSearch.cpp | 111 | ||||
| -rw-r--r-- | lib/Lex/Lexer.cpp | 16 | ||||
| -rw-r--r-- | lib/Lex/LiteralSupport.cpp | 19 | ||||
| -rw-r--r-- | lib/Lex/MacroArgs.cpp | 18 | ||||
| -rw-r--r-- | lib/Lex/MacroInfo.cpp | 7 | ||||
| -rw-r--r-- | lib/Lex/ModuleMap.cpp | 23 | ||||
| -rw-r--r-- | lib/Lex/PPCaching.cpp | 59 | ||||
| -rw-r--r-- | lib/Lex/PPCallbacks.cpp | 7 | ||||
| -rw-r--r-- | lib/Lex/PPConditionalDirectiveRecord.cpp | 17 | ||||
| -rw-r--r-- | lib/Lex/PPDirectives.cpp | 562 | ||||
| -rw-r--r-- | lib/Lex/PPExpressions.cpp | 27 | ||||
| -rw-r--r-- | lib/Lex/PPLexerChange.cpp | 34 | ||||
| -rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 112 | ||||
| -rw-r--r-- | lib/Lex/Pragma.cpp | 338 | ||||
| -rw-r--r-- | lib/Lex/PreprocessingRecord.cpp | 25 | ||||
| -rw-r--r-- | lib/Lex/Preprocessor.cpp | 412 | ||||
| -rw-r--r-- | lib/Lex/PreprocessorLexer.cpp | 15 | ||||
| -rw-r--r-- | lib/Lex/ScratchBuffer.cpp | 7 | ||||
| -rw-r--r-- | lib/Lex/TokenConcatenation.cpp | 12 | ||||
| -rw-r--r-- | lib/Lex/TokenLexer.cpp | 67 | ||||
| -rw-r--r-- | lib/Lex/UnicodeCharSets.h | 7 |
23 files changed, 1969 insertions, 696 deletions
diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp new file mode 100644 index 0000000000000..cfc37c5d3c62b --- /dev/null +++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -0,0 +1,763 @@ +//===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the implementation for minimizing header and source files to the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Lex/LexDiagnostic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace clang; +using namespace clang::minimize_source_to_dependency_directives; + +namespace { + +struct Minimizer { + /// Minimized output. + SmallVectorImpl<char> &Out; + /// The known tokens encountered during the minimization. + SmallVectorImpl<Token> &Tokens; + + Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens, + StringRef Input, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) + : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), + InputSourceLoc(InputSourceLoc) {} + + /// Lex the provided source and emit the minimized output. + /// + /// \returns True on error. + bool minimize(); + +private: + struct IdInfo { + const char *Last; + StringRef Name; + }; + + /// Lex an identifier. + /// + /// \pre First points at a valid identifier head. + LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); + LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, + const char *const End); + LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); + LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); + LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, + const char *&First, const char *const End); + Token &makeToken(TokenKind K) { + Tokens.emplace_back(K, Out.size()); + return Tokens.back(); + } + void popToken() { + Out.resize(Tokens.back().Offset); + Tokens.pop_back(); + } + TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } + + Minimizer &put(char Byte) { + Out.push_back(Byte); + return *this; + } + Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } + Minimizer &append(const char *First, const char *Last) { + Out.append(First, Last); + return *this; + } + + void printToNewline(const char *&First, const char *const End); + void printAdjacentModuleNameParts(const char *&First, const char *const End); + LLVM_NODISCARD bool printAtImportBody(const char *&First, + const char *const End); + void printDirectiveBody(const char *&First, const char *const End); + void printAdjacentMacroArgs(const char *&First, const char *const End); + LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); + + /// Reports a diagnostic if the diagnostic engine is provided. Always returns + /// true at the end. + bool reportError(const char *CurPtr, unsigned Err); + + StringMap<char> SplitIds; + StringRef Input; + DiagnosticsEngine *Diags; + SourceLocation InputSourceLoc; +}; + +} // end anonymous namespace + +bool Minimizer::reportError(const char *CurPtr, unsigned Err) { + if (!Diags) + return true; + assert(CurPtr >= Input.data() && "invalid buffer ptr"); + Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); + return true; +} + +static void skipOverSpaces(const char *&First, const char *const End) { + while (First != End && isHorizontalWhitespace(*First)) + ++First; +} + +LLVM_NODISCARD static bool isRawStringLiteral(const char *First, + const char *Current) { + assert(First <= Current); + + // Check if we can even back up. + if (*Current != '"' || First == Current) + return false; + + // Check for an "R". + --Current; + if (*Current != 'R') + return false; + if (First == Current || !isIdentifierBody(*--Current)) + return true; + + // Check for a prefix of "u", "U", or "L". + if (*Current == 'u' || *Current == 'U' || *Current == 'L') + return First == Current || !isIdentifierBody(*--Current); + + // Check for a prefix of "u8". + if (*Current != '8' || First == Current || *Current-- != 'u') + return false; + return First == Current || !isIdentifierBody(*--Current); +} + +static void skipRawString(const char *&First, const char *const End) { + assert(First[0] == '"'); + assert(First[-1] == 'R'); + + const char *Last = ++First; + while (Last != End && *Last != '(') + ++Last; + if (Last == End) { + First = Last; // Hit the end... just give up. + return; + } + + StringRef Terminator(First, Last - First); + for (;;) { + // Move First to just past the next ")". + First = Last; + while (First != End && *First != ')') + ++First; + if (First == End) + return; + ++First; + + // Look ahead for the terminator sequence. + Last = First; + while (Last != End && size_t(Last - First) < Terminator.size() && + Terminator[Last - First] == *Last) + ++Last; + + // Check if we hit it (or the end of the file). + if (Last == End) { + First = Last; + return; + } + if (size_t(Last - First) < Terminator.size()) + continue; + if (*Last != '"') + continue; + First = Last + 1; + return; + } +} + +static void skipString(const char *&First, const char *const End) { + assert(*First == '\'' || *First == '"'); + const char Terminator = *First; + for (++First; First != End && *First != Terminator; ++First) + if (*First == '\\') + if (++First == End) + return; + if (First != End) + ++First; // Finish off the string. +} + +static void skipNewline(const char *&First, const char *End) { + assert(isVerticalWhitespace(*First)); + ++First; + if (First == End) + return; + + // Check for "\n\r" and "\r\n". + if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0])) + ++First; +} + +static void skipToNewlineRaw(const char *&First, const char *const End) { + for (;;) { + if (First == End) + return; + + if (isVerticalWhitespace(*First)) + return; + + while (!isVerticalWhitespace(*First)) + if (++First == End) + return; + + if (First[-1] != '\\') + return; + + ++First; // Keep going... + } +} + +static const char *reverseOverSpaces(const char *First, const char *Last) { + assert(First <= Last); + while (First != Last && isHorizontalWhitespace(Last[-1])) + --Last; + return Last; +} + +static void skipLineComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '/'); + First += 2; + skipToNewlineRaw(First, End); +} + +static void skipBlockComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '*'); + if (End - First < 4) { + First = End; + return; + } + for (First += 3; First != End; ++First) + if (First[-1] == '*' && First[0] == '/') { + ++First; + return; + } +} + +/// \returns True if the current single quotation mark character is a C++ 14 +/// digit separator. +static bool isQuoteCppDigitSeparator(const char *const Start, + const char *const Cur, + const char *const End) { + assert(*Cur == '\'' && "expected quotation character"); + // skipLine called in places where we don't expect a valid number + // body before `start` on the same line, so always return false at the start. + if (Start == Cur) + return false; + // The previous character must be a valid PP number character. + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') + return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') + return false; + if (!isPreprocessingNumberBody(Prev)) + return false; + // The next character should be a valid identifier body character. + return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); +} + +static void skipLine(const char *&First, const char *const End) { + do { + assert(First <= End); + if (First == End) + return; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + return; + } + const char *Start = First; + while (First != End && !isVerticalWhitespace(*First)) { + // Iterate over strings correctly to avoid comments and newlines. + if (*First == '"' || + (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { + if (isRawStringLiteral(Start, First)) + skipRawString(First, End); + else + skipString(First, End); + continue; + } + + // Iterate over comments correctly. + if (*First != '/' || End - First < 2) { + ++First; + continue; + } + + if (First[1] == '/') { + // "//...". + skipLineComment(First, End); + continue; + } + + if (First[1] != '*') { + ++First; + continue; + } + + // "/*...*/". + skipBlockComment(First, End); + } + if (First == End) + return; + + // Skip over the newline. + assert(isVerticalWhitespace(*First)); + skipNewline(First, End); + } while (First[-2] == '\\'); // Continue past line-continuations. +} + +static void skipDirective(StringRef Name, const char *&First, + const char *const End) { + if (llvm::StringSwitch<bool>(Name) + .Case("warning", true) + .Case("error", true) + .Default(false)) + // Do not process quotes or comments. + skipToNewlineRaw(First, End); + else + skipLine(First, End); +} + +void Minimizer::printToNewline(const char *&First, const char *const End) { + while (First != End && !isVerticalWhitespace(*First)) { + const char *Last = First; + do { + // Iterate over strings correctly to avoid comments and newlines. + if (*Last == '"' || *Last == '\'') { + if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) + skipRawString(Last, End); + else + skipString(Last, End); + continue; + } + if (*Last != '/' || End - Last < 2) { + ++Last; + continue; // Gather the rest up to print verbatim. + } + + if (Last[1] != '/' && Last[1] != '*') { + ++Last; + continue; + } + + // Deal with "//..." and "/*...*/". + append(First, reverseOverSpaces(First, Last)); + First = Last; + + if (Last[1] == '/') { + skipLineComment(First, End); + return; + } + + put(' '); + skipBlockComment(First, End); + skipOverSpaces(First, End); + Last = First; + } while (Last != End && !isVerticalWhitespace(*Last)); + + // Print out the string. + if (Last == End || Last == First || Last[-1] != '\\') { + append(First, reverseOverSpaces(First, Last)); + return; + } + + // Print up to the backslash, backing up over spaces. + append(First, reverseOverSpaces(First, Last - 1)); + + First = Last; + skipNewline(First, End); + skipOverSpaces(First, End); + } +} + +static void skipWhitespace(const char *&First, const char *const End) { + for (;;) { + assert(First <= End); + skipOverSpaces(First, End); + + if (End - First < 2) + return; + + if (First[0] == '\\' && isVerticalWhitespace(First[1])) { + skipNewline(++First, End); + continue; + } + + // Check for a non-comment character. + if (First[0] != '/') + return; + + // "// ...". + if (First[1] == '/') { + skipLineComment(First, End); + return; + } + + // Cannot be a comment. + if (First[1] != '*') + return; + + // "/*...*/". + skipBlockComment(First, End); + } +} + +void Minimizer::printAdjacentModuleNameParts(const char *&First, + const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && (isIdentifierBody(*Last) || *Last == '.')); + append(First, Last); + First = Last; +} + +bool Minimizer::printAtImportBody(const char *&First, const char *const End) { + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + continue; + } + + // Found a semicolon. + if (*First == ';') { + put(*First++).put('\n'); + return false; + } + + // Don't handle macro expansions inside @import for now. + if (!isIdentifierBody(*First) && *First != '.') + return true; + + printAdjacentModuleNameParts(First, End); + } +} + +void Minimizer::printDirectiveBody(const char *&First, const char *const End) { + skipWhitespace(First, End); // Skip initial whitespace. + printToNewline(First, End); + while (Out.back() == ' ') + Out.pop_back(); + put('\n'); +} + +LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, + const char *const End) { + assert(isIdentifierBody(*First) && "invalid identifer"); + const char *Last = First + 1; + while (Last != End && isIdentifierBody(*Last)) + ++Last; + return Last; +} + +LLVM_NODISCARD static const char * +getIdentifierContinuation(const char *First, const char *const End) { + if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) + return nullptr; + + ++First; + skipNewline(First, End); + if (First == End) + return nullptr; + return isIdentifierBody(First[0]) ? First : nullptr; +} + +Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, + const char *const End) { + const char *Last = lexRawIdentifier(First, End); + const char *Next = getIdentifierContinuation(Last, End); + if (LLVM_LIKELY(!Next)) + return IdInfo{Last, StringRef(First, Last - First)}; + + // Slow path, where identifiers are split over lines. + SmallVector<char, 64> Id(First, Last); + while (Next) { + Last = lexRawIdentifier(Next, End); + Id.append(Next, Last); + Next = getIdentifierContinuation(Last, End); + } + return IdInfo{ + Last, + SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; +} + +void Minimizer::printAdjacentMacroArgs(const char *&First, + const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && + (isIdentifierBody(*Last) || *Last == '.' || *Last == ',')); + append(First, Last); + First = Last; +} + +bool Minimizer::printMacroArgs(const char *&First, const char *const End) { + assert(*First == '('); + put(*First++); + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (*First == ')') { + put(*First++); + return false; + } + + // This is intentionally fairly liberal. + if (!(isIdentifierBody(*First) || *First == '.' || *First == ',')) + return true; + + printAdjacentMacroArgs(First, End); + } +} + +/// Looks for an identifier starting from Last. +/// +/// Updates "First" to just past the next identifier, if any. Returns true iff +/// the identifier matches "Id". +bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, + const char *const End) { + skipWhitespace(First, End); + if (First == End || !isIdentifierHead(*First)) + return false; + + IdInfo FoundId = lexIdentifier(First, End); + First = FoundId.Last; + return FoundId.Name == Id; +} + +bool Minimizer::lexAt(const char *&First, const char *const End) { + // Handle "@import". + const char *ImportLoc = First++; + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + makeToken(decl_at_import); + append("@import "); + if (printAtImportBody(First, End)) + return reportError( + ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); + skipWhitespace(First, End); + if (First == End) + return false; + if (!isVerticalWhitespace(*First)) + return reportError( + ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); + skipNewline(First, End); + return false; +} + +bool Minimizer::lexDefine(const char *&First, const char *const End) { + makeToken(pp_define); + append("#define "); + skipWhitespace(First, End); + + if (!isIdentifierHead(*First)) + return reportError(First, diag::err_pp_macro_not_identifier); + + IdInfo Id = lexIdentifier(First, End); + const char *Last = Id.Last; + append(Id.Name); + if (Last == End) + return false; + if (*Last == '(') { + size_t Size = Out.size(); + if (printMacroArgs(Last, End)) { + // Be robust to bad macro arguments, since they can show up in disabled + // code. + Out.resize(Size); + append("(/* invalid */\n"); + skipLine(Last, End); + return false; + } + } + skipWhitespace(Last, End); + if (Last == End) + return false; + if (!isVerticalWhitespace(*Last)) + put(' '); + printDirectiveBody(Last, End); + First = Last; + return false; +} + +bool Minimizer::lexPragma(const char *&First, const char *const End) { + // #pragma. + if (!isNextIdentifier("clang", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang. + if (!isNextIdentifier("module", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module. + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module import. + makeToken(pp_pragma_import); + append("#pragma clang module import "); + printDirectiveBody(First, End); + return false; +} + +bool Minimizer::lexEndif(const char *&First, const char *const End) { + // Strip out "#else" if it's empty. + if (top() == pp_else) + popToken(); + + // Strip out "#elif" if they're empty. + while (top() == pp_elif) + popToken(); + + // If "#if" is empty, strip it and skip the "#endif". + if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) { + popToken(); + skipLine(First, End); + return false; + } + + return lexDefault(pp_endif, "endif", First, End); +} + +bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, + const char *&First, const char *const End) { + makeToken(Kind); + put('#').append(Directive).put(' '); + printDirectiveBody(First, End); + return false; +} + +bool Minimizer::lexPPLine(const char *&First, const char *const End) { + assert(First != End); + + skipWhitespace(First, End); + assert(First <= End); + if (First == End) + return false; + + if (*First != '#' && *First != '@') { + skipLine(First, End); + assert(First <= End); + return false; + } + + // Handle "@import". + if (*First == '@') + return lexAt(First, End); + + // Handle preprocessing directives. + ++First; // Skip over '#'. + skipWhitespace(First, End); + + if (First == End) + return reportError(First, diag::err_pp_expected_eol); + + if (!isIdentifierHead(*First)) { + skipLine(First, End); + return false; + } + + // Figure out the token. + IdInfo Id = lexIdentifier(First, End); + First = Id.Last; + auto Kind = llvm::StringSwitch<TokenKind>(Id.Name) + .Case("include", pp_include) + .Case("__include_macros", pp___include_macros) + .Case("define", pp_define) + .Case("undef", pp_undef) + .Case("import", pp_import) + .Case("include_next", pp_include_next) + .Case("if", pp_if) + .Case("ifdef", pp_ifdef) + .Case("ifndef", pp_ifndef) + .Case("elif", pp_elif) + .Case("else", pp_else) + .Case("endif", pp_endif) + .Case("pragma", pp_pragma_import) + .Default(pp_none); + if (Kind == pp_none) { + skipDirective(Id.Name, First, End); + return false; + } + + if (Kind == pp_endif) + return lexEndif(First, End); + + if (Kind == pp_define) + return lexDefine(First, End); + + if (Kind == pp_pragma_import) + return lexPragma(First, End); + + // Everything else. + return lexDefault(Kind, Id.Name, First, End); +} + +bool Minimizer::minimizeImpl(const char *First, const char *const End) { + while (First != End) + if (lexPPLine(First, End)) + return true; + return false; +} + +bool Minimizer::minimize() { + bool Error = minimizeImpl(Input.begin(), Input.end()); + + if (!Error) { + // Add a trailing newline and an EOF on success. + if (!Out.empty() && Out.back() != '\n') + Out.push_back('\n'); + makeToken(pp_eof); + } + + // Null-terminate the output. This way the memory buffer that's passed to + // Clang will not have to worry about the terminating '\0'. + Out.push_back(0); + Out.pop_back(); + return Error; +} + +bool clang::minimizeSourceToDependencyDirectives( + StringRef Input, SmallVectorImpl<char> &Output, + SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) { + Output.clear(); + Tokens.clear(); + return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); +} diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp index 23cb053c2d718..e0bf58b675056 100644 --- a/lib/Lex/HeaderMap.cpp +++ b/lib/Lex/HeaderMap.cpp @@ -1,9 +1,8 @@ //===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index c65fb47c0fe55..108630cc26f69 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -1,9 +1,8 @@ //===- HeaderSearch.cpp - Resolve Header File Locations -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -335,6 +334,7 @@ const FileEntry *DirectoryLookup::LookupFile( Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, bool &InUserSpecifiedSystemFramework, + bool &IsFrameworkFound, bool &HasBeenMapped, SmallVectorImpl<char> &MappedName) const { InUserSpecifiedSystemFramework = false; @@ -363,7 +363,7 @@ const FileEntry *DirectoryLookup::LookupFile( if (isFramework()) return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath, RequestingModule, SuggestedModule, - InUserSpecifiedSystemFramework); + InUserSpecifiedSystemFramework, IsFrameworkFound); assert(isHeaderMap() && "Unknown directory lookup"); const HeaderMap *HM = getHeaderMap(); @@ -463,7 +463,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( StringRef Filename, HeaderSearch &HS, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, - bool &InUserSpecifiedSystemFramework) const { + bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound) const { FileManager &FileMgr = HS.getFileMgr(); // Framework names must have a '/' in the filename. @@ -472,7 +472,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( // Find out if this is the home for the specified framework, by checking // HeaderSearch. Possible answers are yes/no and unknown. - HeaderSearch::FrameworkCacheEntry &CacheEntry = + FrameworkCacheEntry &CacheEntry = HS.LookupFrameworkCache(Filename.substr(0, SlashPos)); // If it is known and in some other directory, fail. @@ -517,8 +517,9 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( } } - // Set the 'user-specified system framework' flag. + // Set out flags. InUserSpecifiedSystemFramework = CacheEntry.IsUserSpecifiedSystemFramework; + IsFrameworkFound = CacheEntry.Directory; if (RelativePath) { RelativePath->clear(); @@ -538,7 +539,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end()); const FileEntry *FE = FileMgr.getFile(FrameworkName, - /*openFile=*/!SuggestedModule); + /*OpenFile=*/!SuggestedModule); if (!FE) { // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" const char *Private = "Private"; @@ -548,7 +549,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( SearchPath->insert(SearchPath->begin()+OrigSize, Private, Private+strlen(Private)); - FE = FileMgr.getFile(FrameworkName, /*openFile=*/!SuggestedModule); + FE = FileMgr.getFile(FrameworkName, /*OpenFile=*/!SuggestedModule); } // If we found the header and are allowed to suggest a module, do so now. @@ -697,10 +698,14 @@ const FileEntry *HeaderSearch::LookupFile( ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, - bool *IsMapped, bool SkipCache, bool BuildSystemModule) { + bool *IsMapped, bool *IsFrameworkFound, bool SkipCache, + bool BuildSystemModule) { if (IsMapped) *IsMapped = false; + if (IsFrameworkFound) + *IsFrameworkFound = false; + if (SuggestedModule) *SuggestedModule = ModuleMap::KnownHeader(); @@ -852,16 +857,22 @@ const FileEntry *HeaderSearch::LookupFile( for (; i != SearchDirs.size(); ++i) { bool InUserSpecifiedSystemFramework = false; bool HasBeenMapped = false; + bool IsFrameworkFoundInDir = false; const FileEntry *FE = SearchDirs[i].LookupFile( Filename, *this, IncludeLoc, SearchPath, RelativePath, RequestingModule, - SuggestedModule, InUserSpecifiedSystemFramework, HasBeenMapped, - MappedName); + SuggestedModule, InUserSpecifiedSystemFramework, IsFrameworkFoundInDir, + HasBeenMapped, MappedName); if (HasBeenMapped) { CacheLookup.MappedName = copyString(Filename, LookupFileCache.getAllocator()); if (IsMapped) *IsMapped = true; } + if (IsFrameworkFound) + // Because we keep a filename remapped for subsequent search directory + // lookups, ignore IsFrameworkFoundInDir after the first remapping and not + // just for remapping in a current search directory. + *IsFrameworkFound |= (IsFrameworkFoundInDir && !CacheLookup.MappedName); if (!FE) continue; CurDir = &SearchDirs[i]; @@ -927,10 +938,10 @@ const FileEntry *HeaderSearch::LookupFile( ScratchFilename += '/'; ScratchFilename += Filename; - const FileEntry *FE = - LookupFile(ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, - CurDir, Includers.front(), SearchPath, RelativePath, - RequestingModule, SuggestedModule, IsMapped); + const FileEntry *FE = LookupFile( + ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, CurDir, + Includers.front(), SearchPath, RelativePath, RequestingModule, + SuggestedModule, IsMapped, /*IsFrameworkFound=*/nullptr); if (checkMSVCHeaderSearch(Diags, MSFE, FE, IncludeLoc)) { if (SuggestedModule) @@ -1036,7 +1047,7 @@ LookupSubframeworkHeader(StringRef Filename, } HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end()); - if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) { + if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true))) { // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" HeadersFilename = FrameworkName; HeadersFilename += "PrivateHeaders/"; @@ -1047,7 +1058,7 @@ LookupSubframeworkHeader(StringRef Filename, } HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end()); - if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) + if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true))) return nullptr; } @@ -1571,7 +1582,7 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { DirNative); // Search each of the ".framework" directories to load them as modules. - llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem(); + llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { @@ -1642,7 +1653,7 @@ void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) { FileMgr.makeAbsolutePath(Dir); SmallString<128> DirNative; llvm::sys::path::native(Dir, DirNative); - llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem(); + llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { bool IsFramework = llvm::sys::path::extension(Dir->path()) == ".framework"; @@ -1654,34 +1665,30 @@ void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) { SearchDir.setSearchedAllModuleMaps(true); } -std::string HeaderSearch::suggestPathToFileForDiagnostics(const FileEntry *File, - bool *IsSystem) { +std::string HeaderSearch::suggestPathToFileForDiagnostics( + const FileEntry *File, llvm::StringRef MainFile, bool *IsSystem) { // FIXME: We assume that the path name currently cached in the FileEntry is // the most appropriate one for this analysis (and that it's spelled the // same way as the corresponding header search path). - return suggestPathToFileForDiagnostics(File->getName(), /*BuildDir=*/"", - IsSystem); + return suggestPathToFileForDiagnostics(File->getName(), /*WorkingDir=*/"", + MainFile, IsSystem); } std::string HeaderSearch::suggestPathToFileForDiagnostics( - llvm::StringRef File, llvm::StringRef WorkingDir, bool *IsSystem) { + llvm::StringRef File, llvm::StringRef WorkingDir, llvm::StringRef MainFile, + bool *IsSystem) { using namespace llvm::sys; unsigned BestPrefixLength = 0; - unsigned BestSearchDir; - - for (unsigned I = 0; I != SearchDirs.size(); ++I) { - // FIXME: Support this search within frameworks and header maps. - if (!SearchDirs[I].isNormalDir()) - continue; - - StringRef Dir = SearchDirs[I].getDir()->getName(); + // Checks whether Dir and File shares a common prefix, if they do and that's + // the longest prefix we've seen so for it returns true and updates the + // BestPrefixLength accordingly. + auto CheckDir = [&](llvm::StringRef Dir) -> bool { llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); - if (!WorkingDir.empty() && !path::is_absolute(Dir)) { + if (!WorkingDir.empty() && !path::is_absolute(Dir)) fs::make_absolute(WorkingDir, DirPath); - path::remove_dots(DirPath, /*remove_dot_dot=*/true); - Dir = DirPath; - } + path::remove_dots(DirPath, /*remove_dot_dot=*/true); + Dir = DirPath; for (auto NI = path::begin(File), NE = path::end(File), DI = path::begin(Dir), DE = path::end(Dir); /*termination condition in loop*/; ++NI, ++DI) { @@ -1700,17 +1707,37 @@ std::string HeaderSearch::suggestPathToFileForDiagnostics( unsigned PrefixLength = NI - path::begin(File); if (PrefixLength > BestPrefixLength) { BestPrefixLength = PrefixLength; - BestSearchDir = I; + return true; } break; } + // Consider all path separators equal. + if (NI->size() == 1 && DI->size() == 1 && + path::is_separator(NI->front()) && path::is_separator(DI->front())) + continue; + if (*NI != *DI) break; } + return false; + }; + + for (unsigned I = 0; I != SearchDirs.size(); ++I) { + // FIXME: Support this search within frameworks and header maps. + if (!SearchDirs[I].isNormalDir()) + continue; + + StringRef Dir = SearchDirs[I].getDir()->getName(); + if (CheckDir(Dir) && IsSystem) + *IsSystem = BestPrefixLength ? I >= SystemDirIdx : false; } - if (IsSystem) - *IsSystem = BestPrefixLength ? BestSearchDir >= SystemDirIdx : false; - return File.drop_front(BestPrefixLength); + // Try to shorten include path using TUs directory, if we couldn't find any + // suitable prefix in include search paths. + if (!BestPrefixLength && CheckDir(path::parent_path(MainFile)) && IsSystem) + *IsSystem = false; + + + return path::convert_to_slash(File.drop_front(BestPrefixLength)); } diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index d4723091114a1..db53e6bec0440 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -1,9 +1,8 @@ //===- Lexer.cpp - C Language Family Lexer --------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -688,7 +687,6 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer, // We only end up here if we didn't recognize the preprocessor // directive or it was one that can't occur in the preamble at this // point. Roll back the current token to the location of the '#'. - InPreprocessorDirective = false; TheTok = HashTok; } @@ -2073,7 +2071,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; - FormTokenWithChars(Result, CurPtr, tok::angle_string_literal); + FormTokenWithChars(Result, CurPtr, tok::header_name); Result.setLiteralData(TokStart); return true; } @@ -3233,7 +3231,7 @@ LexNextToken: case '\r': if (CurPtr[0] == '\n') - Char = getAndAdvanceChar(CurPtr, Result); + (void)getAndAdvanceChar(CurPtr, Result); LLVM_FALLTHROUGH; case '\n': // If we are inside a preprocessor directive and we see the end of line, @@ -3466,7 +3464,9 @@ LexNextToken: case '"': // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexStringLiteral(Result, CurPtr, tok::string_literal); + return LexStringLiteral(Result, CurPtr, + ParsingFilename ? tok::header_name + : tok::string_literal); // C99 6.4.6: Punctuators. case '?': diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index fa0815eb9c6c5..2108408377fb2 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -1,9 +1,8 @@ //===--- LiteralSupport.cpp - Code to parse and process literals ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -617,10 +616,14 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, if (isHalf || isFloat || isLong || isFloat128) break; // HF, FF, LF, QF invalid. - if (s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { - s += 2; // success, eat up 2 characters. - isFloat16 = true; - continue; + // CUDA host and device may have different _Float16 support, therefore + // allows f16 literals to avoid false alarm. + // ToDo: more precise check for CUDA. + if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) && + s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { + s += 2; // success, eat up 2 characters. + isFloat16 = true; + continue; } isFloat = true; diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp index dc2ba3074a8b1..5aa4679fad462 100644 --- a/lib/Lex/MacroArgs.cpp +++ b/lib/Lex/MacroArgs.cpp @@ -1,9 +1,8 @@ //===--- MacroArgs.cpp - Formal argument info for Macros ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -136,15 +135,12 @@ const Token *MacroArgs::getUnexpArgument(unsigned Arg) const { return Result; } -// This function assumes that the variadic arguments are the tokens -// corresponding to the last parameter (ellipsis) - and since tokens are -// separated by the 'eof' token, if that is the only token corresponding to that -// last parameter, we know no variadic arguments were supplied. -bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI) const { +bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI, + Preprocessor &PP) { if (!MI->isVariadic()) return false; const int VariadicArgIndex = getNumMacroArguments() - 1; - return getUnexpArgument(VariadicArgIndex)->isNot(tok::eof); + return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof); } /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected @@ -185,7 +181,7 @@ const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg, // list. With this installed, we lex expanded tokens until we hit the EOF // token at the end of the unexp list. PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, - false /*owns tokens*/); + false /*owns tokens*/, false /*is reinject*/); // Lex all of the macro-expanded tokens into Result. do { diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp index 434c120075964..1ccd140364aeb 100644 --- a/lib/Lex/MacroInfo.cpp +++ b/lib/Lex/MacroInfo.cpp @@ -1,9 +1,8 @@ //===- MacroInfo.cpp - Information about #defined identifiers -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp index cff950b703a6f..5e0be1a57da41 100644 --- a/lib/Lex/ModuleMap.cpp +++ b/lib/Lex/ModuleMap.cpp @@ -1,9 +1,8 @@ //===- ModuleMap.cpp - Describe the layout of modules ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -807,7 +806,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, return std::make_pair(Result, true); } -Module *ModuleMap::createGlobalModuleForInterfaceUnit(SourceLocation Loc) { +Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc) { PendingSubmodules.emplace_back( new Module("<global>", Loc, nullptr, /*IsFramework*/ false, /*IsExplicit*/ true, NumCreatedModules++)); @@ -815,6 +814,16 @@ Module *ModuleMap::createGlobalModuleForInterfaceUnit(SourceLocation Loc) { return PendingSubmodules.back().get(); } +Module * +ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent, + SourceLocation Loc) { + auto *Result = + new Module("<private>", Loc, Parent, /*IsFramework*/ false, + /*IsExplicit*/ true, NumCreatedModules++); + Result->Kind = Module::PrivateModuleFragment; + return Result; +} + Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, StringRef Name, Module *GlobalModule) { @@ -1022,7 +1031,7 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir, = StringRef(FrameworkDir->getName()); llvm::sys::path::append(SubframeworksDirName, "Frameworks"); llvm::sys::path::native(SubframeworksDirName); - llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem(); + llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); for (llvm::vfs::directory_iterator Dir = FS.dir_begin(SubframeworksDirName, EC), DirEnd; @@ -2398,7 +2407,7 @@ void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) { std::error_code EC; SmallVector<Module::Header, 6> Headers; llvm::vfs::FileSystem &FS = - *SourceMgr.getFileManager().getVirtualFileSystem(); + SourceMgr.getFileManager().getVirtualFileSystem(); for (llvm::vfs::recursive_directory_iterator I(FS, Dir->getName(), EC), E; I != E && !EC; I.increment(EC)) { if (const FileEntry *FE = SourceMgr.getFileManager().getFile(I->path())) { diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp index 9758557d7b448..31548d246d5a5 100644 --- a/lib/Lex/PPCaching.cpp +++ b/lib/Lex/PPCaching.cpp @@ -1,9 +1,8 @@ //===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -24,6 +23,7 @@ using namespace clang; // be called multiple times and CommitBacktrackedTokens/Backtrack calls will // be combined with the EnableBacktrackAtThisPos calls in reverse order. void Preprocessor::EnableBacktrackAtThisPos() { + assert(LexLevel == 0 && "cannot use lookahead while lexing"); BacktrackPositions.push_back(CachedLexPos); EnterCachingLexMode(); } @@ -35,29 +35,6 @@ void Preprocessor::CommitBacktrackedTokens() { BacktrackPositions.pop_back(); } -Preprocessor::CachedTokensRange Preprocessor::LastCachedTokenRange() { - assert(isBacktrackEnabled()); - auto PrevCachedLexPos = BacktrackPositions.back(); - return CachedTokensRange{PrevCachedLexPos, CachedLexPos}; -} - -void Preprocessor::EraseCachedTokens(CachedTokensRange TokenRange) { - assert(TokenRange.Begin <= TokenRange.End); - if (CachedLexPos == TokenRange.Begin && TokenRange.Begin != TokenRange.End) { - // We have backtracked to the start of the token range as we want to consume - // them again. Erase the tokens only after consuming then. - assert(!CachedTokenRangeToErase); - CachedTokenRangeToErase = TokenRange; - return; - } - // The cached tokens were committed, so they should be erased now. - assert(TokenRange.End == CachedLexPos); - CachedTokens.erase(CachedTokens.begin() + TokenRange.Begin, - CachedTokens.begin() + TokenRange.End); - CachedLexPos = TokenRange.Begin; - ExitCachingLexMode(); -} - // Make Preprocessor re-lex the tokens that were lexed since // EnableBacktrackAtThisPos() was previously called. void Preprocessor::Backtrack() { @@ -72,15 +49,13 @@ void Preprocessor::CachingLex(Token &Result) { if (!InCachingLexMode()) return; + // The assert in EnterCachingLexMode should prevent this from happening. + assert(LexLevel == 1 && + "should not use token caching within the preprocessor"); + if (CachedLexPos < CachedTokens.size()) { Result = CachedTokens[CachedLexPos++]; - // Erase the some of the cached tokens after they are consumed when - // asked to do so. - if (CachedTokenRangeToErase && - CachedTokenRangeToErase->End == CachedLexPos) { - EraseCachedTokens(*CachedTokenRangeToErase); - CachedTokenRangeToErase = None; - } + Result.setFlag(Token::IsReinjected); return; } @@ -89,14 +64,14 @@ void Preprocessor::CachingLex(Token &Result) { if (isBacktrackEnabled()) { // Cache the lexed token. - EnterCachingLexMode(); + EnterCachingLexModeUnchecked(); CachedTokens.push_back(Result); ++CachedLexPos; return; } if (CachedLexPos < CachedTokens.size()) { - EnterCachingLexMode(); + EnterCachingLexModeUnchecked(); } else { // All cached tokens were consumed. CachedTokens.clear(); @@ -105,11 +80,23 @@ void Preprocessor::CachingLex(Token &Result) { } void Preprocessor::EnterCachingLexMode() { + // The caching layer sits on top of all the other lexers, so it's incorrect + // to cache tokens while inside a nested lex action. The cached tokens would + // be retained after returning to the enclosing lex action and, at best, + // would appear at the wrong position in the token stream. + assert(LexLevel == 0 && + "entered caching lex mode while lexing something else"); + if (InCachingLexMode()) { assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind"); return; } + EnterCachingLexModeUnchecked(); +} + +void Preprocessor::EnterCachingLexModeUnchecked() { + assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode"); PushIncludeMacroStack(); CurLexerKind = CLK_CachingLexer; } diff --git a/lib/Lex/PPCallbacks.cpp b/lib/Lex/PPCallbacks.cpp index 952b926005b0a..cd8b04b20d245 100644 --- a/lib/Lex/PPCallbacks.cpp +++ b/lib/Lex/PPCallbacks.cpp @@ -1,9 +1,8 @@ //===--- PPCallbacks.cpp - Callbacks for Preprocessor actions ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Lex/PPConditionalDirectiveRecord.cpp b/lib/Lex/PPConditionalDirectiveRecord.cpp index 12a77849b8b30..facee28007c7d 100644 --- a/lib/Lex/PPConditionalDirectiveRecord.cpp +++ b/lib/Lex/PPConditionalDirectiveRecord.cpp @@ -1,9 +1,8 @@ //===--- PPConditionalDirectiveRecord.h - Preprocessing Directives-*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -26,9 +25,8 @@ bool PPConditionalDirectiveRecord::rangeIntersectsConditionalDirective( if (Range.isInvalid()) return false; - CondDirectiveLocsTy::const_iterator - low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), - Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr)); + CondDirectiveLocsTy::const_iterator low = llvm::lower_bound( + CondDirectiveLocs, Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr)); if (low == CondDirectiveLocs.end()) return false; @@ -56,9 +54,8 @@ SourceLocation PPConditionalDirectiveRecord::findConditionalDirectiveRegionLoc( Loc)) return CondDirectiveStack.back(); - CondDirectiveLocsTy::const_iterator - low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(), - Loc, CondDirectiveLoc::Comp(SourceMgr)); + CondDirectiveLocsTy::const_iterator low = llvm::lower_bound( + CondDirectiveLocs, Loc, CondDirectiveLoc::Comp(SourceMgr)); assert(low != CondDirectiveLocs.end()); return low->getRegionLoc(); } diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index d62a3513c7770..2756042f23eb2 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -1,9 +1,8 @@ //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -79,12 +78,18 @@ Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, /// Read and discard all tokens remaining on the current line until /// the tok::eod token is found. -void Preprocessor::DiscardUntilEndOfDirective() { +SourceRange Preprocessor::DiscardUntilEndOfDirective() { Token Tmp; - do { - LexUnexpandedToken(Tmp); + SourceRange Res; + + LexUnexpandedToken(Tmp); + Res.setBegin(Tmp.getLocation()); + while (Tmp.isNot(tok::eod)) { assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens"); - } while (Tmp.isNot(tok::eod)); + LexUnexpandedToken(Tmp); + } + Res.setEnd(Tmp.getLocation()); + return Res; } /// Enumerates possible cases of #define/#undef a reserved identifier. @@ -331,7 +336,10 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, /// /// If not, emit a diagnostic and consume up until the eod. If EnableMacros is /// true, then we consider macros that expand to zero tokens as being ok. -void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { +/// +/// Returns the location of the end of the directive. +SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, + bool EnableMacros) { Token Tmp; // Lex unexpanded tokens for most directives: macros might expand to zero // tokens, causing us to miss diagnosing invalid lines. Some directives (like @@ -346,18 +354,19 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { while (Tmp.is(tok::comment)) // Skip comments in -C mode. LexUnexpandedToken(Tmp); - if (Tmp.isNot(tok::eod)) { - // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89, - // or if this is a macro-style preprocessing directive, because it is more - // trouble than it is worth to insert /**/ and check that there is no /**/ - // in the range also. - FixItHint Hint; - if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && - !CurTokenLexer) - Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); - Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; - DiscardUntilEndOfDirective(); - } + if (Tmp.is(tok::eod)) + return Tmp.getLocation(); + + // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89, + // or if this is a macro-style preprocessing directive, because it is more + // trouble than it is worth to insert /**/ and check that there is no /**/ + // in the range also. + FixItHint Hint; + if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && + !CurTokenLexer) + Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); + Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; + return DiscardUntilEndOfDirective().getEnd(); } /// SkipExcludedConditionalBlock - We just read a \#if or related directive and @@ -538,19 +547,19 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { DiscardUntilEndOfDirective(); } else { - const SourceLocation CondBegin = CurPPLexer->getSourceLocation(); // Restore the value of LexingRawMode so that identifiers are // looked up, etc, inside the #elif expression. assert(CurPPLexer->LexingRawMode && "We have to be skipping here!"); CurPPLexer->LexingRawMode = false; IdentifierInfo *IfNDefMacro = nullptr; - const bool CondValue = EvaluateDirectiveExpression(IfNDefMacro).Conditional; + DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); + const bool CondValue = DER.Conditional; CurPPLexer->LexingRawMode = true; if (Callbacks) { - const SourceLocation CondEnd = CurPPLexer->getSourceLocation(); - Callbacks->Elif(Tok.getLocation(), - SourceRange(CondBegin, CondEnd), - (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False), CondInfo.IfLoc); + Callbacks->Elif( + Tok.getLocation(), DER.ExprRange, + (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False), + CondInfo.IfLoc); } // If this condition is true, enter it! if (CondValue) { @@ -605,9 +614,16 @@ Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, SourceLocation Loc) { assert(M && "no module to include"); + // If the context is the global module fragment of some module, we never + // want to return that file; instead, we want the innermost include-guarded + // header that it included. + bool InGlobalModuleFragment = M->Kind == Module::GlobalModuleFragment; + // If we have a module import syntax, we shouldn't include a header to // make a particular module visible. - if (getLangOpts().ObjC) + if ((getLangOpts().ObjC || getLangOpts().CPlusPlusModules || + getLangOpts().ModulesTS) && + !InGlobalModuleFragment) return nullptr; Module *TopM = M->getTopLevelModule(); @@ -624,6 +640,13 @@ Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, if (!FE) break; + if (InGlobalModuleFragment) { + if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE)) + return FE; + Loc = SM.getIncludeLoc(ID); + continue; + } + bool InTextualHeader = false; for (auto Header : HeaderInfo.getModuleMap().findAllModulesForHeader(FE)) { if (!Header.getModule()->isSubModuleOf(TopM)) @@ -660,7 +683,8 @@ const FileEntry *Preprocessor::LookupFile( const DirectoryLookup *FromDir, const FileEntry *FromFile, const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, - ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool SkipCache) { + ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, + bool *IsFrameworkFound, bool SkipCache) { Module *RequestingModule = getModuleForLocation(FilenameLoc); bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc); @@ -718,7 +742,8 @@ const FileEntry *Preprocessor::LookupFile( while (const FileEntry *FE = HeaderInfo.LookupFile( Filename, FilenameLoc, isAngled, TmpFromDir, TmpCurDir, Includers, SearchPath, RelativePath, RequestingModule, - SuggestedModule, /*IsMapped=*/nullptr, SkipCache)) { + SuggestedModule, /*IsMapped=*/nullptr, + /*IsFrameworkFound=*/nullptr, SkipCache)) { // Keep looking as if this file did a #include_next. TmpFromDir = TmpCurDir; ++TmpFromDir; @@ -734,8 +759,8 @@ const FileEntry *Preprocessor::LookupFile( // Do a standard file entry lookup. const FileEntry *FE = HeaderInfo.LookupFile( Filename, FilenameLoc, isAngled, FromDir, CurDir, Includers, SearchPath, - RelativePath, RequestingModule, SuggestedModule, IsMapped, SkipCache, - BuildSystemModule); + RelativePath, RequestingModule, SuggestedModule, IsMapped, + IsFrameworkFound, SkipCache, BuildSystemModule); if (FE) { if (SuggestedModule && !LangOpts.AsmPreprocessor) HeaderInfo.getModuleMap().diagnoseHeaderInclusion( @@ -822,10 +847,10 @@ void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result, return HandleIncludeDirective(HashLoc, Result); } if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) { - Token P = LookAhead(0); - auto *II = P.getIdentifierInfo(); + Lex(Result); + auto *II = Result.getIdentifierInfo(); if (II && II->getName() == "hdrstop") - return HandlePragmaDirective(HashLoc, PIK_HashPragma); + return HandlePragmaHdrstop(Result); } } DiscardUntilEndOfDirective(); @@ -879,6 +904,8 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp___include_macros: case tok::pp_pragma: Diag(Result, diag::err_embedded_directive) << II->getName(); + Diag(*ArgMacro, diag::note_macro_expansion_here) + << ArgMacro->getIdentifierInfo(); DiscardUntilEndOfDirective(); return; default: @@ -955,7 +982,7 @@ void Preprocessor::HandleDirective(Token &Result) { // C99 6.10.6 - Pragma Directive. case tok::pp_pragma: - return HandlePragmaDirective(SavedHash.getLocation(), PIK_HashPragma); + return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); // GNU Extensions. case tok::pp_import: @@ -1008,7 +1035,7 @@ void Preprocessor::HandleDirective(Token &Result) { // Enter this token stream so that we re-lex the tokens. Make sure to // enable macro expansion, in case the token after the # is an identifier // that is expanded. - EnterTokenStream(std::move(Toks), 2, false); + EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false); return; } @@ -1116,19 +1143,24 @@ void Preprocessor::HandleLineDirective() { ; // ok else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_line_invalid_filename); - return DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(); + return; } else if (StrTok.hasUDSuffix()) { Diag(StrTok, diag::err_invalid_string_udl); - return DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(); + return; } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(StrTok, *this); assert(Literal.isAscii() && "Didn't allow wide strings in"); - if (Literal.hadError) - return DiscardUntilEndOfDirective(); + if (Literal.hadError) { + DiscardUntilEndOfDirective(); + return; + } if (Literal.Pascal) { Diag(StrTok, diag::err_pp_linemarker_invalid_filename); - return DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(); + return; } FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); @@ -1261,19 +1293,24 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation()); } else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_linemarker_invalid_filename); - return DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(); + return; } else if (StrTok.hasUDSuffix()) { Diag(StrTok, diag::err_invalid_string_udl); - return DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(); + return; } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(StrTok, *this); assert(Literal.isAscii() && "Didn't allow wide strings in"); - if (Literal.hadError) - return DiscardUntilEndOfDirective(); + if (Literal.hadError) { + DiscardUntilEndOfDirective(); + return; + } if (Literal.Pascal) { Diag(StrTok, diag::err_pp_linemarker_invalid_filename); - return DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(); + return; } FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); @@ -1343,7 +1380,8 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { if (StrTok.hasUDSuffix()) { Diag(StrTok, diag::err_invalid_string_udl); - return DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(); + return; } // Verify that there is nothing after the string, other than EOD. @@ -1381,7 +1419,7 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) { // Note that this macro has now been exported. appendMacroDirective(II, AllocateVisibilityMacroDirective( - MacroNameTok.getLocation(), /*IsPublic=*/true)); + MacroNameTok.getLocation(), /*isPublic=*/true)); } /// Handle a #private directive. @@ -1408,7 +1446,7 @@ void Preprocessor::HandleMacroPrivateDirective() { // Note that this macro has now been marked private. appendMacroDirective(II, AllocateVisibilityMacroDirective( - MacroNameTok.getLocation(), /*IsPublic=*/false)); + MacroNameTok.getLocation(), /*isPublic=*/false)); } //===----------------------------------------------------------------------===// @@ -1426,6 +1464,14 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, // Get the text form of the filename. assert(!Buffer.empty() && "Can't have tokens with empty spellings!"); + // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and + // C++20 [lex.header]/2: + // + // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then + // in C: behavior is undefined + // in C++: program is conditionally-supported with implementation-defined + // semantics + // Make sure the filename is <x> or "x". bool isAngled; if (Buffer[0] == '<') { @@ -1460,67 +1506,6 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, return isAngled; } -// Handle cases where the \#include name is expanded from a macro -// as multiple tokens, which need to be glued together. -// -// This occurs for code like: -// \code -// \#define FOO <a/b.h> -// \#include FOO -// \endcode -// because in this case, "<a/b.h>" is returned as 7 tokens, not one. -// -// This code concatenates and consumes tokens up to the '>' token. It returns -// false if the > was found, otherwise it returns true if it finds and consumes -// the EOD marker. -bool Preprocessor::ConcatenateIncludeName(SmallString<128> &FilenameBuffer, - SourceLocation &End) { - Token CurTok; - - Lex(CurTok); - while (CurTok.isNot(tok::eod)) { - End = CurTok.getLocation(); - - // FIXME: Provide code completion for #includes. - if (CurTok.is(tok::code_completion)) { - setCodeCompletionReached(); - Lex(CurTok); - continue; - } - - // Append the spelling of this token to the buffer. If there was a space - // before it, add it now. - if (CurTok.hasLeadingSpace()) - FilenameBuffer.push_back(' '); - - // Get the spelling of the token, directly into FilenameBuffer if possible. - size_t PreAppendSize = FilenameBuffer.size(); - FilenameBuffer.resize(PreAppendSize+CurTok.getLength()); - - const char *BufPtr = &FilenameBuffer[PreAppendSize]; - unsigned ActualLen = getSpelling(CurTok, BufPtr); - - // If the token was spelled somewhere else, copy it into FilenameBuffer. - if (BufPtr != &FilenameBuffer[PreAppendSize]) - memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); - - // Resize FilenameBuffer to the correct size. - if (CurTok.getLength() != ActualLen) - FilenameBuffer.resize(PreAppendSize+ActualLen); - - // If we found the '>' marker, return success. - if (CurTok.is(tok::greater)) - return false; - - Lex(CurTok); - } - - // If we hit the eod marker, emit an error and return true so that the caller - // knows the EOD has been read. - Diag(CurTok.getLocation(), diag::err_pp_expects_filename); - return true; -} - /// Push a token onto the token stream containing an annotation. void Preprocessor::EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, @@ -1533,7 +1518,7 @@ void Preprocessor::EnterAnnotationToken(SourceRange Range, Tok[0].setLocation(Range.getBegin()); Tok[0].setAnnotationEndLoc(Range.getEnd()); Tok[0].setAnnotationValue(AnnotationVal); - EnterTokenStream(std::move(Tok), 1, true); + EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false); } /// Produce a diagnostic informing the user that a #include or similar @@ -1542,7 +1527,13 @@ static void diagnoseAutoModuleImport( Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok, ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path, SourceLocation PathEnd) { - assert(PP.getLangOpts().ObjC && "no import syntax available"); + StringRef ImportKeyword; + if (PP.getLangOpts().ObjC) + ImportKeyword = "@import"; + else if (PP.getLangOpts().ModulesTS || PP.getLangOpts().CPlusPlusModules) + ImportKeyword = "import"; + else + return; // no import syntax available SmallString<128> PathString; for (size_t I = 0, N = Path.size(); I != N; ++I) { @@ -1577,8 +1568,8 @@ static void diagnoseAutoModuleImport( /*IsTokenRange=*/false); PP.Diag(HashLoc, diag::warn_auto_module_import) << IncludeKind << PathString - << FixItHint::CreateReplacement(ReplaceRange, - ("@import " + PathString + ";").str()); + << FixItHint::CreateReplacement( + ReplaceRange, (ImportKeyword + " " + PathString + ";").str()); } // Given a vector of path components and a string containing the real @@ -1648,72 +1639,79 @@ bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts, void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, Token &IncludeTok, const DirectoryLookup *LookupFrom, - const FileEntry *LookupFromFile, - bool isImport) { + const FileEntry *LookupFromFile) { Token FilenameTok; - CurPPLexer->LexIncludeFilename(FilenameTok); - - // Reserve a buffer to get the spelling. - SmallString<128> FilenameBuffer; - StringRef Filename; - SourceLocation End; - SourceLocation CharEnd; // the end of this directive, in characters + if (LexHeaderName(FilenameTok)) + return; - switch (FilenameTok.getKind()) { - case tok::eod: - // If the token kind is EOD, the error has already been diagnosed. + if (FilenameTok.isNot(tok::header_name)) { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + if (FilenameTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(); return; + } - case tok::angle_string_literal: - case tok::string_literal: - Filename = getSpelling(FilenameTok, FilenameBuffer); - End = FilenameTok.getLocation(); - CharEnd = End.getLocWithOffset(FilenameTok.getLength()); - break; + // Verify that there is nothing after the filename, other than EOD. Note + // that we allow macros that expand to nothing after the filename, because + // this falls into the category of "#include pp-tokens new-line" specified + // in C99 6.10.2p4. + SourceLocation EndLoc = + CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true); - case tok::less: - // This could be a <foo/bar.h> file coming from a macro expansion. In this - // case, glue the tokens together into FilenameBuffer and interpret those. - FilenameBuffer.push_back('<'); - if (ConcatenateIncludeName(FilenameBuffer, End)) - return; // Found <eod> but no ">"? Diagnostic already emitted. - Filename = FilenameBuffer; - CharEnd = End.getLocWithOffset(1); + auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok, + EndLoc, LookupFrom, LookupFromFile); + switch (Action.Kind) { + case ImportAction::None: + case ImportAction::SkippedModuleImport: + break; + case ImportAction::ModuleBegin: + EnterAnnotationToken(SourceRange(HashLoc, EndLoc), + tok::annot_module_begin, Action.ModuleForHeader); + break; + case ImportAction::ModuleImport: + EnterAnnotationToken(SourceRange(HashLoc, EndLoc), + tok::annot_module_include, Action.ModuleForHeader); break; - default: - Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); - DiscardUntilEndOfDirective(); - return; } +} + +/// Handle either a #include-like directive or an import declaration that names +/// a header file. +/// +/// \param HashLoc The location of the '#' token for an include, or +/// SourceLocation() for an import declaration. +/// \param IncludeTok The include / include_next / import token. +/// \param FilenameTok The header-name token. +/// \param EndLoc The location at which any imported macros become visible. +/// \param LookupFrom For #include_next, the starting directory for the +/// directory lookup. +/// \param LookupFromFile For #include_next, the starting file for the directory +/// lookup. +Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( + SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok, + SourceLocation EndLoc, const DirectoryLookup *LookupFrom, + const FileEntry *LookupFromFile) { + SmallString<128> FilenameBuffer; + StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); + SourceLocation CharEnd = FilenameTok.getEndLoc(); CharSourceRange FilenameRange = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd); StringRef OriginalFilename = Filename; bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an // error. - if (Filename.empty()) { - DiscardUntilEndOfDirective(); - return; - } + if (Filename.empty()) + return {ImportAction::None}; - // Verify that there is nothing after the filename, other than EOD. Note that - // we allow macros that expand to nothing after the filename, because this - // falls into the category of "#include pp-tokens new-line" specified in - // C99 6.10.2p4. - CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true); - - // Check that we don't have infinite #include recursion. - if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) { - Diag(FilenameTok, diag::err_pp_include_too_deep); - HasReachedMaxIncludeDepth = true; - return; - } + bool IsImportDecl = HashLoc.isInvalid(); + SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc; // Complain about attempts to #include files in an audit pragma. if (PragmaARCCFCodeAuditedLoc.isValid()) { - Diag(HashLoc, diag::err_pp_include_in_arc_cf_code_audited); + Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl; Diag(PragmaARCCFCodeAuditedLoc, diag::note_pragma_entered_here); // Immediately leave the pragma. @@ -1722,7 +1720,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // Complain about attempts to #include files in an assume-nonnull pragma. if (PragmaAssumeNonNullLoc.isValid()) { - Diag(HashLoc, diag::err_pp_include_in_assume_nonnull); + Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl; Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here); // Immediately leave the pragma. @@ -1740,6 +1738,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // Search include directories. bool IsMapped = false; + bool IsFrameworkFound = false; const DirectoryLookup *CurDir; SmallString<1024> SearchPath; SmallString<1024> RelativePath; @@ -1758,7 +1757,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, FilenameLoc, LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled, LookupFrom, LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, - &SuggestedModule, &IsMapped); + &SuggestedModule, &IsMapped, &IsFrameworkFound); if (!File) { if (Callbacks) { @@ -1775,7 +1774,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, FilenameLoc, LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled, LookupFrom, LookupFromFile, CurDir, nullptr, nullptr, - &SuggestedModule, &IsMapped, /*SkipCache*/ true); + &SuggestedModule, &IsMapped, /*IsFrameworkFound=*/nullptr, + /*SkipCache*/ true); } } } @@ -1790,12 +1790,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, false, LookupFrom, LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, - Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped); + Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, + /*IsFrameworkFound=*/nullptr); if (File) { - SourceRange Range(FilenameTok.getLocation(), CharEnd); - Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal) << - Filename << - FixItHint::CreateReplacement(Range, "\"" + Filename.str() + "\""); + Diag(FilenameTok, + diag::err_pp_file_not_found_angled_include_not_fatal) + << Filename << IsImportDecl + << FixItHint::CreateReplacement(FilenameRange, + "\"" + Filename.str() + "\""); } } @@ -1826,14 +1828,15 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, : TypoCorrectionName, isAngled, LookupFrom, LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, - Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped); + Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, + /*IsFrameworkFound=*/nullptr); if (File) { - SourceRange Range(FilenameTok.getLocation(), CharEnd); - auto Hint = isAngled - ? FixItHint::CreateReplacement( - Range, "<" + TypoCorrectionName.str() + ">") - : FixItHint::CreateReplacement( - Range, "\"" + TypoCorrectionName.str() + "\""); + auto Hint = + isAngled + ? FixItHint::CreateReplacement( + FilenameRange, "<" + TypoCorrectionName.str() + ">") + : FixItHint::CreateReplacement( + FilenameRange, "\"" + TypoCorrectionName.str() + "\""); Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal) << OriginalFilename << TypoCorrectionName << Hint; // We found the file, so set the Filename to the name after typo @@ -1843,38 +1846,63 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, } // If the file is still not found, just go with the vanilla diagnostic - if (!File) + if (!File) { Diag(FilenameTok, diag::err_pp_file_not_found) << OriginalFilename << FilenameRange; + if (IsFrameworkFound) { + size_t SlashPos = OriginalFilename.find('/'); + assert(SlashPos != StringRef::npos && + "Include with framework name should have '/' in the filename"); + StringRef FrameworkName = OriginalFilename.substr(0, SlashPos); + FrameworkCacheEntry &CacheEntry = + HeaderInfo.LookupFrameworkCache(FrameworkName); + assert(CacheEntry.Directory && "Found framework should be in cache"); + Diag(FilenameTok, diag::note_pp_framework_without_header) + << OriginalFilename.substr(SlashPos + 1) << FrameworkName + << CacheEntry.Directory->getName(); + } + } } } if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) { if (isPCHThroughHeader(File)) SkippingUntilPCHThroughHeader = false; - return; + return {ImportAction::None}; + } + + // Check for circular inclusion of the main file. + // We can't generate a consistent preamble with regard to the conditional + // stack if the main file is included again as due to the preamble bounds + // some directives (e.g. #endif of a header guard) will never be seen. + // Since this will lead to confusing errors, avoid the inclusion. + if (File && PreambleConditionalStack.isRecording() && + SourceMgr.translateFile(File) == SourceMgr.getMainFileID()) { + Diag(FilenameTok.getLocation(), + diag::err_pp_including_mainfile_in_preamble); + return {ImportAction::None}; } - // Should we enter the source file? Set to false if either the source file is + // Should we enter the source file? Set to Skip if either the source file is // known to have no effect beyond its effect on module visibility -- that is, - // if it's got an include guard that is already defined or is a modular header - // we've imported or already built. - bool ShouldEnter = true; + // if it's got an include guard that is already defined, set to Import if it + // is a modular header we've already built and should import. + enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter; if (PPOpts->SingleFileParseMode) - ShouldEnter = false; + Action = IncludeLimitReached; // If we've reached the max allowed include depth, it is usually due to an // include cycle. Don't enter already processed files again as it can lead to // reaching the max allowed include depth again. - if (ShouldEnter && HasReachedMaxIncludeDepth && File && + if (Action == Enter && HasReachedMaxIncludeDepth && File && HeaderInfo.getFileInfo(File).NumIncludes) - ShouldEnter = false; + Action = IncludeLimitReached; // Determine whether we should try to import the module for this #include, if // there is one. Don't do so if precompiled module support is disabled or we // are processing this module textually (because we're building the module). - if (ShouldEnter && File && SuggestedModule && getLangOpts().Modules && + if (Action == Enter && File && SuggestedModule && getLangOpts().Modules && !isForModuleBuilding(SuggestedModule.getModule(), getLangOpts().CurrentModule, getLangOpts().ModuleName)) { @@ -1887,7 +1915,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, Diag(FilenameTok.getLocation(), diag::note_implicit_top_level_module_import_here) << SuggestedModule.getModule()->getTopLevelModuleName(); - return; + return {ImportAction::None}; } // Compute the module access path corresponding to this module. @@ -1900,9 +1928,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, std::reverse(Path.begin(), Path.end()); // Warn that we're replacing the include/import with a module import. - // We only do this in Objective-C, where we have a module-import syntax. - if (getLangOpts().ObjC) - diagnoseAutoModuleImport(*this, HashLoc, IncludeTok, Path, CharEnd); + if (!IsImportDecl) + diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd); // Load the module to import its macros. We'll make the declarations // visible when the parser gets here. @@ -1910,13 +1937,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // and making the module loader convert it back again. ModuleLoadResult Imported = TheModuleLoader.loadModule( IncludeTok.getLocation(), Path, Module::Hidden, - /*IsIncludeDirective=*/true); + /*IsInclusionDirective=*/true); assert((Imported == nullptr || Imported == SuggestedModule.getModule()) && "the imported module is different than the suggested one"); - if (Imported) - ShouldEnter = false; - else if (Imported.isMissingExpected()) { + if (Imported) { + Action = Import; + } else if (Imported.isMissingExpected()) { // We failed to find a submodule that we assumed would exist (because it // was in the directory of an umbrella header, for instance), but no // actual module containing it exists (because the umbrella header is @@ -1935,7 +1962,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); CurLexer->cutOffLexing(); } - return; + return {ImportAction::None}; } } @@ -1947,33 +1974,54 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, if (File) FileCharacter = std::max(HeaderInfo.getFileDirFlavor(File), FileCharacter); + // If this is a '#import' or an import-declaration, don't re-enter the file. + // + // FIXME: If we have a suggested module for a '#include', and we've already + // visited this file, don't bother entering it again. We know it has no + // further effect. + bool EnterOnce = + IsImportDecl || + IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import; + // Ask HeaderInfo if we should enter this #include file. If not, #including // this file will have no effect. - bool SkipHeader = false; - if (ShouldEnter && File && - !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport, + if (Action == Enter && File && + !HeaderInfo.ShouldEnterIncludeFile(*this, File, EnterOnce, getLangOpts().Modules, SuggestedModule.getModule())) { - ShouldEnter = false; - SkipHeader = true; + // Even if we've already preprocessed this header once and know that we + // don't need to see its contents again, we still need to import it if it's + // modular because we might not have imported it from this submodule before. + // + // FIXME: We don't do this when compiling a PCH because the AST + // serialization layer can't cope with it. This means we get local + // submodule visibility semantics wrong in that case. + Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip; } - if (Callbacks) { + if (Callbacks && !IsImportDecl) { // Notify the callback object that we've seen an inclusion directive. + // FIXME: Use a different callback for a pp-import? Callbacks->InclusionDirective( HashLoc, IncludeTok, LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled, FilenameRange, File, SearchPath, RelativePath, - ShouldEnter ? nullptr : SuggestedModule.getModule(), FileCharacter); - if (SkipHeader && !SuggestedModule.getModule()) + Action == Import ? SuggestedModule.getModule() : nullptr, + FileCharacter); + if (Action == Skip) Callbacks->FileSkipped(*File, FilenameTok, FileCharacter); } if (!File) - return; + return {ImportAction::None}; - // FIXME: If we have a suggested module, and we've already visited this file, - // don't bother entering it again. We know it has no further effect. + // If this is a C++20 pp-import declaration, diagnose if we didn't find any + // module corresponding to the named header. + if (IsImportDecl && !SuggestedModule) { + Diag(FilenameTok, diag::err_header_import_not_header_unit) + << OriginalFilename << File->getName(); + return {ImportAction::None}; + } // Issue a diagnostic if the name of the file on disk has a different case // than the one we're about to open. @@ -2005,37 +2053,50 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // For other system headers, we don't. They can be controlled separately. auto DiagId = (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) ? diag::pp_nonportable_path : diag::pp_nonportable_system_path; - SourceRange Range(FilenameTok.getLocation(), CharEnd); Diag(FilenameTok, DiagId) << Path << - FixItHint::CreateReplacement(Range, Path); + FixItHint::CreateReplacement(FilenameRange, Path); } } - // If we don't need to enter the file, stop now. - if (!ShouldEnter) { + switch (Action) { + case Skip: + // If we don't need to enter the file, stop now. + if (Module *M = SuggestedModule.getModule()) + return {ImportAction::SkippedModuleImport, M}; + return {ImportAction::None}; + + case IncludeLimitReached: + // If we reached our include limit and don't want to enter any more files, + // don't go any further. + return {ImportAction::None}; + + case Import: { // If this is a module import, make it visible if needed. - if (auto *M = SuggestedModule.getModule()) { - // When building a pch, -fmodule-name tells the compiler to textually - // include headers in the specified module. But it is possible that - // ShouldEnter is false because we are skipping the header. In that - // case, We are not importing the specified module. - if (SkipHeader && getLangOpts().CompilingPCH && - isForModuleBuilding(M, getLangOpts().CurrentModule, - getLangOpts().ModuleName)) - return; + Module *M = SuggestedModule.getModule(); + assert(M && "no module to import"); - makeModuleVisible(M, HashLoc); + makeModuleVisible(M, EndLoc); - if (IncludeTok.getIdentifierInfo()->getPPKeywordID() != - tok::pp___include_macros) - EnterAnnotationToken(SourceRange(HashLoc, End), - tok::annot_module_include, M); - } - return; + if (IncludeTok.getIdentifierInfo()->getPPKeywordID() == + tok::pp___include_macros) + return {ImportAction::None}; + + return {ImportAction::ModuleImport, M}; + } + + case Enter: + break; + } + + // Check that we don't have infinite #include recursion. + if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) { + Diag(FilenameTok, diag::err_pp_include_too_deep); + HasReachedMaxIncludeDepth = true; + return {ImportAction::None}; } // Look up the file, create a File ID for it. - SourceLocation IncludePos = End; + SourceLocation IncludePos = FilenameTok.getLocation(); // If the filename string was the result of macro expansions, set the include // position on the file where it will be included and after the expansions. if (IncludePos.isMacroID()) @@ -2045,7 +2106,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // If all is good, enter the new file! if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation())) - return; + return {ImportAction::None}; // Determine if we're switching to building a new submodule, and which one. if (auto *M = SuggestedModule.getModule()) { @@ -2056,29 +2117,37 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, << M->getFullModuleName(); Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc, diag::note_previous_definition); - return; + return {ImportAction::None}; } // When building a pch, -fmodule-name tells the compiler to textually // include headers in the specified module. We are not building the // specified module. + // + // FIXME: This is the wrong way to handle this. We should produce a PCH + // that behaves the same as the header would behave in a compilation using + // that PCH, which means we should enter the submodule. We need to teach + // the AST serialization layer to deal with the resulting AST. if (getLangOpts().CompilingPCH && isForModuleBuilding(M, getLangOpts().CurrentModule, getLangOpts().ModuleName)) - return; + return {ImportAction::None}; assert(!CurLexerSubmodule && "should not have marked this as a module yet"); CurLexerSubmodule = M; // Let the macro handling code know that any future macros are within // the new submodule. - EnterSubmodule(M, HashLoc, /*ForPragma*/false); + EnterSubmodule(M, EndLoc, /*ForPragma*/false); // Let the parser know that any future declarations are within the new // submodule. // FIXME: There's no point doing this if we're handling a #__include_macros // directive. - EnterAnnotationToken(SourceRange(HashLoc, End), tok::annot_module_begin, M); + return {ImportAction::ModuleBegin, M}; } + + assert(!IsImportDecl && "failed to diagnose missing module for import decl"); + return {ImportAction::None}; } /// HandleIncludeNextDirective - Implements \#include_next. @@ -2106,6 +2175,10 @@ void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, LookupFromFile = CurPPLexer->getFileEntry(); Lookup = nullptr; } else if (!Lookup) { + // The current file was not found by walking the include path. Either it + // is the primary file (handled above), or it was found by absolute path, + // or it was found relative to such a file. + // FIXME: Track enough information so we know which case we're in. Diag(IncludeNextTok, diag::pp_include_next_absolute_path); } else { // Start looking up in the next directory. @@ -2139,7 +2212,7 @@ void Preprocessor::HandleImportDirective(SourceLocation HashLoc, return HandleMicrosoftImportDirective(ImportTok); Diag(ImportTok, diag::ext_pp_import_directive); } - return HandleIncludeDirective(HashLoc, ImportTok, nullptr, nullptr, true); + return HandleIncludeDirective(HashLoc, ImportTok); } /// HandleIncludeMacrosDirective - The -imacros command line option turns into a @@ -2198,8 +2271,7 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { // OpenCL v1.2 s6.9.e: variadic macros are not supported. if (LangOpts.OpenCL) { - Diag(Tok, diag::err_pp_opencl_variadic_macros); - return true; + Diag(Tok, diag::ext_pp_opencl_variadic_macros); } // Lex the token after the identifier. @@ -2228,8 +2300,7 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { // If this is already used as a parameter, it is used multiple times (e.g. // #define X(A,A. - if (std::find(Parameters.begin(), Parameters.end(), II) != - Parameters.end()) { // C99 6.10.3p6 + if (llvm::find(Parameters, II) != Parameters.end()) { // C99 6.10.3p6 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II; return true; } @@ -2791,10 +2862,8 @@ void Preprocessor::HandleIfDirective(Token &IfToken, // Parse and evaluate the conditional expression. IdentifierInfo *IfNDefMacro = nullptr; - const SourceLocation ConditionalBegin = CurPPLexer->getSourceLocation(); const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); const bool ConditionalTrue = DER.Conditional; - const SourceLocation ConditionalEnd = CurPPLexer->getSourceLocation(); // If this condition is equivalent to #ifndef X, and if this is the first // directive seen, handle it for the multiple-include optimization. @@ -2807,9 +2876,9 @@ void Preprocessor::HandleIfDirective(Token &IfToken, } if (Callbacks) - Callbacks->If(IfToken.getLocation(), - SourceRange(ConditionalBegin, ConditionalEnd), - (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False)); + Callbacks->If( + IfToken.getLocation(), DER.ExprRange, + (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False)); // Should we include the stuff contained by this directive? if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) { @@ -2902,9 +2971,7 @@ void Preprocessor::HandleElifDirective(Token &ElifToken, // #elif directive in a non-skipping conditional... start skipping. // We don't care what the condition is, because we will always skip it (since // the block immediately before it was included). - const SourceLocation ConditionalBegin = CurPPLexer->getSourceLocation(); - DiscardUntilEndOfDirective(); - const SourceLocation ConditionalEnd = CurPPLexer->getSourceLocation(); + SourceRange ConditionRange = DiscardUntilEndOfDirective(); PPConditionalInfo CI; if (CurPPLexer->popConditionalLevel(CI)) { @@ -2920,8 +2987,7 @@ void Preprocessor::HandleElifDirective(Token &ElifToken, if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else); if (Callbacks) - Callbacks->Elif(ElifToken.getLocation(), - SourceRange(ConditionalBegin, ConditionalEnd), + Callbacks->Elif(ElifToken.getLocation(), ConditionRange, PPCallbacks::CVK_NotEvaluated, CI.IfLoc); if (PPOpts->SingleFileParseMode && !CI.FoundNonSkip) { diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp index ac01efad9bf69..e5ec2b99f5074 100644 --- a/lib/Lex/PPExpressions.cpp +++ b/lib/Lex/PPExpressions.cpp @@ -1,9 +1,8 @@ //===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -152,8 +151,8 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, return true; } // Consume the ). - Result.setEnd(PeekTok.getLocation()); PP.LexNonComment(PeekTok); + Result.setEnd(PeekTok.getLocation()); } else { // Consume identifier. Result.setEnd(PeekTok.getLocation()); @@ -842,14 +841,22 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { PPValue ResVal(BitWidth); DefinedTracker DT; + SourceLocation ExprStartLoc = SourceMgr.getExpansionLoc(Tok.getLocation()); if (EvaluateValue(ResVal, Tok, DT, true, *this)) { // Parse error, skip the rest of the macro line. + SourceRange ConditionRange = ExprStartLoc; if (Tok.isNot(tok::eod)) - DiscardUntilEndOfDirective(); + ConditionRange = DiscardUntilEndOfDirective(); // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {false, DT.IncludedUndefinedIds}; + + // We cannot trust the source range from the value because there was a + // parse error. Track the range manually -- the end of the directive is the + // end of the condition range. + return {false, + DT.IncludedUndefinedIds, + {ExprStartLoc, ConditionRange.getEnd()}}; } // If we are at the end of the expression after just parsing a value, there @@ -863,7 +870,7 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds}; + return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; } // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the @@ -876,7 +883,7 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {false, DT.IncludedUndefinedIds}; + return {false, DT.IncludedUndefinedIds, ResVal.getRange()}; } // If we aren't at the tok::eod token, something bad happened, like an extra @@ -888,5 +895,5 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds}; + return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; } diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index e321dd38fed6d..7cce5f9c9fe48 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -1,9 +1,8 @@ //===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -155,10 +154,11 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, /// must be freed. /// void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, - bool DisableMacroExpansion, - bool OwnsTokens) { + bool DisableMacroExpansion, bool OwnsTokens, + bool IsReinject) { if (CurLexerKind == CLK_CachingLexer) { if (CachedLexPos < CachedTokens.size()) { + assert(IsReinject && "new tokens in the middle of cached stream"); // We're entering tokens into the middle of our cached token stream. We // can't represent that, so just insert the tokens into the buffer. CachedTokens.insert(CachedTokens.begin() + CachedLexPos, @@ -171,7 +171,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, // New tokens are at the end of the cached token sequnece; insert the // token stream underneath the caching lexer. ExitCachingLexMode(); - EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens); + EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens, + IsReinject); EnterCachingLexMode(); return; } @@ -180,10 +181,11 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, std::unique_ptr<TokenLexer> TokLexer; if (NumCachedTokenLexers == 0) { TokLexer = llvm::make_unique<TokenLexer>( - Toks, NumToks, DisableMacroExpansion, OwnsTokens, *this); + Toks, NumToks, DisableMacroExpansion, OwnsTokens, IsReinject, *this); } else { TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]); - TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens); + TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens, + IsReinject); } // Save our current state. @@ -271,7 +273,7 @@ void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) { ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap(); const DirectoryEntry *Dir = Mod.getUmbrellaDir().Entry; - llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem(); + llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); std::error_code EC; for (llvm::vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC), End; @@ -645,6 +647,8 @@ void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc, BuildingSubmoduleStack.push_back( BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState, PendingModuleMacroNames.size())); + if (Callbacks) + Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma); return; } @@ -689,6 +693,9 @@ void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc, BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState, PendingModuleMacroNames.size())); + if (Callbacks) + Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma); + // Switch to this submodule as the current submodule. CurSubmoduleState = &State; @@ -729,6 +736,10 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) { // are tracking macro visibility, don't build any, and preserve the list // of pending names for the surrounding submodule. BuildingSubmoduleStack.pop_back(); + + if (Callbacks) + Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma); + makeModuleVisible(LeavingMod, ImportLoc); return LeavingMod; } @@ -813,6 +824,9 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) { BuildingSubmoduleStack.pop_back(); + if (Callbacks) + Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma); + // A nested #include makes the included submodule visible. makeModuleVisible(LeavingMod, ImportLoc); return LeavingMod; diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index c70ff46ec9049..687b9a9d3b7bd 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -1,9 +1,8 @@ -//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===// +//===--- PPMacroExpansion.cpp - Top level Macro Expansion -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -44,6 +43,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> @@ -364,6 +364,7 @@ void Preprocessor::RegisterBuiltinMacros() { } // Clang Extensions. + Ident__FILE_NAME__ = RegisterBuiltinMacro(*this, "__FILE_NAME__"); Ident__has_feature = RegisterBuiltinMacro(*this, "__has_feature"); Ident__has_extension = RegisterBuiltinMacro(*this, "__has_extension"); Ident__has_builtin = RegisterBuiltinMacro(*this, "__has_builtin"); @@ -493,10 +494,13 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // Preprocessor directives used inside macro arguments are not portable, and // this enables the warning. InMacroArgs = true; + ArgMacro = &Identifier; + Args = ReadMacroCallArgumentList(Identifier, MI, ExpansionEnd); // Finished parsing args. InMacroArgs = false; + ArgMacro = nullptr; // If there was an error parsing the arguments, bail out. if (!Args) return true; @@ -802,7 +806,7 @@ MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName, // Do not lose the EOF/EOD. auto Toks = llvm::make_unique<Token[]>(1); Toks[0] = Tok; - EnterTokenStream(std::move(Toks), 1, true); + EnterTokenStream(std::move(Toks), 1, true, /*IsReinject*/ false); break; } else if (Tok.is(tok::r_paren)) { // If we found the ) token, the macro arg list is done. @@ -1151,8 +1155,11 @@ static bool EvaluateHasIncludeCommon(Token &Tok, return false; } - // Get '('. - PP.LexNonComment(Tok); + // Get '('. If we don't have a '(', try to form a header-name token. + do { + if (PP.LexHeaderName(Tok)) + return false; + } while (Tok.getKind() == tok::comment); // Ensure we have a '('. if (Tok.isNot(tok::l_paren)) { @@ -1161,58 +1168,27 @@ static bool EvaluateHasIncludeCommon(Token &Tok, PP.Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren; // If the next token looks like a filename or the start of one, // assume it is and process it as such. - if (!Tok.is(tok::angle_string_literal) && !Tok.is(tok::string_literal) && - !Tok.is(tok::less)) + if (Tok.isNot(tok::header_name)) return false; } else { // Save '(' location for possible missing ')' message. LParenLoc = Tok.getLocation(); - - if (PP.getCurrentLexer()) { - // Get the file name. - PP.getCurrentLexer()->LexIncludeFilename(Tok); - } else { - // We're in a macro, so we can't use LexIncludeFilename; just - // grab the next token. - PP.Lex(Tok); - } - } - - // Reserve a buffer to get the spelling. - SmallString<128> FilenameBuffer; - StringRef Filename; - SourceLocation EndLoc; - - switch (Tok.getKind()) { - case tok::eod: - // If the token kind is EOD, the error has already been diagnosed. - return false; - - case tok::angle_string_literal: - case tok::string_literal: { - bool Invalid = false; - Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); - if (Invalid) + if (PP.LexHeaderName(Tok)) return false; - break; } - case tok::less: - // This could be a <foo/bar.h> file coming from a macro expansion. In this - // case, glue the tokens together into FilenameBuffer and interpret those. - FilenameBuffer.push_back('<'); - if (PP.ConcatenateIncludeName(FilenameBuffer, EndLoc)) { - // Let the caller know a <eod> was found by changing the Token kind. - Tok.setKind(tok::eod); - return false; // Found <eod> but no ">"? Diagnostic already emitted. - } - Filename = FilenameBuffer; - break; - default: + if (Tok.isNot(tok::header_name)) { PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename); return false; } + // Reserve a buffer to get the spelling. + SmallString<128> FilenameBuffer; + bool Invalid = false; + StringRef Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); + if (Invalid) + return false; + SourceLocation FilenameLoc = Tok.getLocation(); // Get ')'. @@ -1236,7 +1212,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, const DirectoryLookup *CurDir; const FileEntry *File = PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, LookupFromFile, - CurDir, nullptr, nullptr, nullptr, nullptr); + CurDir, nullptr, nullptr, nullptr, nullptr, nullptr); if (PPCallbacks *Callbacks = PP.getPPCallbacks()) { SrcMgr::CharacteristicKind FileType = SrcMgr::C_User; @@ -1354,9 +1330,13 @@ already_lexed: // The last ')' has been reached; return the value if one found or // a diagnostic and a dummy value. - if (Result.hasValue()) + if (Result.hasValue()) { OS << Result.getValue(); - else { + // For strict conformance to __has_cpp_attribute rules, use 'L' + // suffix for dated literals. + if (Result.getValue() > 1) + OS << 'L'; + } else { OS << 0; if (!SuppressDiagnostic) PP.Diag(Tok.getLocation(), diag::err_too_few_args_in_macro_invoc); @@ -1478,6 +1458,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // Set up the return result. Tok.setIdentifierInfo(nullptr); Tok.clearFlag(Token::NeedsCleaning); + bool IsAtStartOfLine = Tok.isAtStartOfLine(); + bool HasLeadingSpace = Tok.hasLeadingSpace(); if (II == Ident__LINE__) { // C99 6.10.8: "__LINE__: The presumed line number (within the current @@ -1500,7 +1482,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // __LINE__ expands to a simple numeric value. OS << (PLoc.isValid()? PLoc.getLine() : 1); Tok.setKind(tok::numeric_constant); - } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) { + } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__ || + II == Ident__FILE_NAME__) { // C99 6.10.8: "__FILE__: The presumed name of the current source file (a // character string literal)". This can be affected by #line. PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation()); @@ -1521,7 +1504,19 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // Escape this filename. Turn '\' -> '\\' '"' -> '\"' SmallString<128> FN; if (PLoc.isValid()) { - FN += PLoc.getFilename(); + // __FILE_NAME__ is a Clang-specific extension that expands to the + // the last part of __FILE__. + if (II == Ident__FILE_NAME__) { + // Try to get the last path component, failing that return the original + // presumed location. + StringRef PLFileName = llvm::sys::path::filename(PLoc.getFilename()); + if (PLFileName != "") + FN += PLFileName; + else + FN += PLoc.getFilename(); + } else { + FN += PLoc.getFilename(); + } Lexer::Stringify(FN); OS << '"' << FN << '"'; } @@ -1631,6 +1626,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { .Case("__is_target_vendor", true) .Case("__is_target_os", true) .Case("__is_target_environment", true) + .Case("__builtin_LINE", true) + .Case("__builtin_FILE", true) + .Case("__builtin_FUNCTION", true) + .Case("__builtin_COLUMN", true) + .Case("__builtin_bit_cast", true) .Default(false); } }); @@ -1707,7 +1707,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { HasLexedNextToken = Tok.is(tok::string_literal); if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'", - /*MacroExpansion=*/false)) + /*AllowMacroExpansion=*/false)) return false; // FIXME: Should we accept "-R..." flags here, or should that be @@ -1814,6 +1814,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { llvm_unreachable("Unknown identifier!"); } CreateString(OS.str(), Tok, Tok.getLocation(), Tok.getLocation()); + Tok.setFlagValue(Token::StartOfLine, IsAtStartOfLine); + Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); } void Preprocessor::markMacroAsUsed(MacroInfo *MI) { diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index 575935119f6f1..4e4db668551f8 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -1,9 +1,8 @@ //===- Pragma.cpp - Pragma registration and handling ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -64,7 +63,7 @@ PragmaHandler::~PragmaHandler() = default; EmptyPragmaHandler::EmptyPragmaHandler(StringRef Name) : PragmaHandler(Name) {} void EmptyPragmaHandler::HandlePragma(Preprocessor &PP, - PragmaIntroducerKind Introducer, + PragmaIntroducer Introducer, Token &FirstToken) {} //===----------------------------------------------------------------------===// @@ -99,8 +98,7 @@ void PragmaNamespace::RemovePragmaHandler(PragmaHandler *Handler) { } void PragmaNamespace::HandlePragma(Preprocessor &PP, - PragmaIntroducerKind Introducer, - Token &Tok) { + PragmaIntroducer Introducer, Token &Tok) { // Read the 'namespace' that the directive is in, e.g. STDC. Do not macro // expand it, the user can have a STDC #define, that should not affect this. PP.LexUnexpandedToken(Tok); @@ -125,10 +123,9 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, /// HandlePragmaDirective - The "\#pragma" directive has been parsed. Lex the /// rest of the pragma, passing it to the registered pragma handlers. -void Preprocessor::HandlePragmaDirective(SourceLocation IntroducerLoc, - PragmaIntroducerKind Introducer) { +void Preprocessor::HandlePragmaDirective(PragmaIntroducer Introducer) { if (Callbacks) - Callbacks->PragmaDirective(IntroducerLoc, Introducer); + Callbacks->PragmaDirective(Introducer.Loc, Introducer.Kind); if (!PragmasEnabled) return; @@ -145,84 +142,73 @@ void Preprocessor::HandlePragmaDirective(SourceLocation IntroducerLoc, DiscardUntilEndOfDirective(); } -namespace { - -/// Helper class for \see Preprocessor::Handle_Pragma. -class LexingFor_PragmaRAII { - Preprocessor &PP; - bool InMacroArgPreExpansion; - bool Failed = false; - Token &OutTok; - Token PragmaTok; - -public: - LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion, - Token &Tok) - : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) { - if (InMacroArgPreExpansion) { - PragmaTok = OutTok; - PP.EnableBacktrackAtThisPos(); - } - } - - ~LexingFor_PragmaRAII() { - if (InMacroArgPreExpansion) { - // When committing/backtracking the cached pragma tokens in a macro - // argument pre-expansion we want to ensure that either the tokens which - // have been committed will be removed from the cache or that the tokens - // over which we just backtracked won't remain in the cache after they're - // consumed and that the caching will stop after consuming them. - // Otherwise the caching will interfere with the way macro expansion - // works, because we will continue to cache tokens after consuming the - // backtracked tokens, which shouldn't happen when we're dealing with - // macro argument pre-expansion. - auto CachedTokenRange = PP.LastCachedTokenRange(); - if (Failed) { - PP.CommitBacktrackedTokens(); - } else { - PP.Backtrack(); - OutTok = PragmaTok; - } - PP.EraseCachedTokens(CachedTokenRange); - } - } - - void failed() { - Failed = true; - } -}; - -} // namespace - /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then /// return the first token after the directive. The _Pragma token has just /// been read into 'Tok'. void Preprocessor::Handle_Pragma(Token &Tok) { - // This works differently if we are pre-expanding a macro argument. - // In that case we don't actually "activate" the pragma now, we only lex it - // until we are sure it is lexically correct and then we backtrack so that - // we activate the pragma whenever we encounter the tokens again in the token - // stream. This ensures that we will activate it in the correct location - // or that we will ignore it if it never enters the token stream, e.g: + // C11 6.10.3.4/3: + // all pragma unary operator expressions within [a completely + // macro-replaced preprocessing token sequence] are [...] processed [after + // rescanning is complete] + // + // This means that we execute _Pragma operators in two cases: + // + // 1) on token sequences that would otherwise be produced as the output of + // phase 4 of preprocessing, and + // 2) on token sequences formed as the macro-replaced token sequence of a + // macro argument // - // #define EMPTY(x) - // #define INACTIVE(x) EMPTY(x) - // INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\"")) + // Case #2 appears to be a wording bug: only _Pragmas that would survive to + // the end of phase 4 should actually be executed. Discussion on the WG14 + // mailing list suggests that a _Pragma operator is notionally checked early, + // but only pragmas that survive to the end of phase 4 should be executed. + // + // In Case #2, we check the syntax now, but then put the tokens back into the + // token stream for later consumption. + + struct TokenCollector { + Preprocessor &Self; + bool Collect; + SmallVector<Token, 3> Tokens; + Token &Tok; + + void lex() { + if (Collect) + Tokens.push_back(Tok); + Self.Lex(Tok); + } - LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok); + void revert() { + assert(Collect && "did not collect tokens"); + assert(!Tokens.empty() && "collected unexpected number of tokens"); + + // Push the ( "string" ) tokens into the token stream. + auto Toks = llvm::make_unique<Token[]>(Tokens.size()); + std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get()); + Toks[Tokens.size() - 1] = Tok; + Self.EnterTokenStream(std::move(Toks), Tokens.size(), + /*DisableMacroExpansion*/ true, + /*IsReinject*/ true); + + // ... and return the _Pragma token unchanged. + Tok = *Tokens.begin(); + } + }; + + TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok}; // Remember the pragma token location. SourceLocation PragmaLoc = Tok.getLocation(); // Read the '('. - Lex(Tok); + Toks.lex(); if (Tok.isNot(tok::l_paren)) { Diag(PragmaLoc, diag::err__Pragma_malformed); - return _PragmaLexing.failed(); + return; } // Read the '"..."'. - Lex(Tok); + Toks.lex(); if (!tok::isStringLiteral(Tok.getKind())) { Diag(PragmaLoc, diag::err__Pragma_malformed); // Skip bad tokens, and the ')', if present. @@ -234,7 +220,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { Lex(Tok); if (Tok.is(tok::r_paren)) Lex(Tok); - return _PragmaLexing.failed(); + return; } if (Tok.hasUDSuffix()) { @@ -243,21 +229,24 @@ void Preprocessor::Handle_Pragma(Token &Tok) { Lex(Tok); if (Tok.is(tok::r_paren)) Lex(Tok); - return _PragmaLexing.failed(); + return; } // Remember the string. Token StrTok = Tok; // Read the ')'. - Lex(Tok); + Toks.lex(); if (Tok.isNot(tok::r_paren)) { Diag(PragmaLoc, diag::err__Pragma_malformed); - return _PragmaLexing.failed(); + return; } - if (InMacroArgPreExpansion) + // If we're expanding a macro argument, put the tokens back. + if (InMacroArgPreExpansion) { + Toks.revert(); return; + } SourceLocation RParenLoc = Tok.getLocation(); std::string StrVal = getSpelling(StrTok); @@ -330,7 +319,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { EnterSourceFileWithLexer(TL, nullptr); // With everything set up, lex this as a #pragma directive. - HandlePragmaDirective(PragmaLoc, PIK__Pragma); + HandlePragmaDirective({PIK__Pragma, PragmaLoc}); // Finally, return whatever came after the pragma directive. return Lex(Tok); @@ -376,10 +365,11 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { std::copy(PragmaToks.begin(), PragmaToks.end(), TokArray); // Push the tokens onto the stack. - EnterTokenStream(TokArray, PragmaToks.size(), true, true); + EnterTokenStream(TokArray, PragmaToks.size(), true, true, + /*IsReinject*/ false); // With everything set up, lex this as a #pragma directive. - HandlePragmaDirective(PragmaLoc, PIK___pragma); + HandlePragmaDirective({PIK___pragma, PragmaLoc}); // Finally, return whatever came after the pragma directive. return Lex(Tok); @@ -483,11 +473,14 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { /// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah. void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { Token FilenameTok; - CurPPLexer->LexIncludeFilename(FilenameTok); + if (LexHeaderName(FilenameTok, /*AllowConcatenation*/false)) + return; - // If the token kind is EOD, the error has already been diagnosed. - if (FilenameTok.is(tok::eod)) + // If the next token wasn't a header-name, diagnose the error. + if (FilenameTok.isNot(tok::header_name)) { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); return; + } // Reserve a buffer to get the spelling. SmallString<128> FilenameBuffer; @@ -507,7 +500,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { const DirectoryLookup *CurDir; const FileEntry *File = LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr, - nullptr, CurDir, nullptr, nullptr, nullptr, nullptr); + nullptr, CurDir, nullptr, nullptr, nullptr, nullptr, nullptr); if (!File) { if (!SuppressIncludeNotFoundError) Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; @@ -663,24 +656,13 @@ void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) { // We expect either a quoted string literal, or a bracketed name Token SourceFilenameTok; - CurPPLexer->LexIncludeFilename(SourceFilenameTok); - if (SourceFilenameTok.is(tok::eod)) { - // The diagnostic has already been handled + if (LexHeaderName(SourceFilenameTok)) return; - } StringRef SourceFileName; SmallString<128> FileNameBuffer; - if (SourceFilenameTok.is(tok::string_literal) || - SourceFilenameTok.is(tok::angle_string_literal)) { + if (SourceFilenameTok.is(tok::header_name)) { SourceFileName = getSpelling(SourceFilenameTok, FileNameBuffer); - } else if (SourceFilenameTok.is(tok::less)) { - // This could be a path instead of just a name - FileNameBuffer.push_back('<'); - SourceLocation End; - if (ConcatenateIncludeName(FileNameBuffer, End)) - return; // Diagnostic already emitted - SourceFileName = FileNameBuffer; } else { Diag(Tok, diag::warn_pragma_include_alias_expected_filename); return; @@ -695,23 +677,12 @@ void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) { } Token ReplaceFilenameTok; - CurPPLexer->LexIncludeFilename(ReplaceFilenameTok); - if (ReplaceFilenameTok.is(tok::eod)) { - // The diagnostic has already been handled + if (LexHeaderName(ReplaceFilenameTok)) return; - } StringRef ReplaceFileName; - if (ReplaceFilenameTok.is(tok::string_literal) || - ReplaceFilenameTok.is(tok::angle_string_literal)) { + if (ReplaceFilenameTok.is(tok::header_name)) { ReplaceFileName = getSpelling(ReplaceFilenameTok, FileNameBuffer); - } else if (ReplaceFilenameTok.is(tok::less)) { - // This could be a path instead of just a name - FileNameBuffer.push_back('<'); - SourceLocation End; - if (ConcatenateIncludeName(FileNameBuffer, End)) - return; // Diagnostic already emitted - ReplaceFileName = FileNameBuffer; } else { Diag(Tok, diag::warn_pragma_include_alias_expected_filename); return; @@ -986,7 +957,7 @@ namespace { struct PragmaOnceHandler : public PragmaHandler { PragmaOnceHandler() : PragmaHandler("once") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &OnceTok) override { PP.CheckEndOfDirective("pragma once"); PP.HandlePragmaOnce(OnceTok); @@ -998,7 +969,7 @@ struct PragmaOnceHandler : public PragmaHandler { struct PragmaMarkHandler : public PragmaHandler { PragmaMarkHandler() : PragmaHandler("mark") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &MarkTok) override { PP.HandlePragmaMark(); } @@ -1008,7 +979,7 @@ struct PragmaMarkHandler : public PragmaHandler { struct PragmaPoisonHandler : public PragmaHandler { PragmaPoisonHandler() : PragmaHandler("poison") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &PoisonTok) override { PP.HandlePragmaPoison(); } @@ -1019,7 +990,7 @@ struct PragmaPoisonHandler : public PragmaHandler { struct PragmaSystemHeaderHandler : public PragmaHandler { PragmaSystemHeaderHandler() : PragmaHandler("system_header") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &SHToken) override { PP.HandlePragmaSystemHeader(SHToken); PP.CheckEndOfDirective("pragma"); @@ -1029,7 +1000,7 @@ struct PragmaSystemHeaderHandler : public PragmaHandler { struct PragmaDependencyHandler : public PragmaHandler { PragmaDependencyHandler() : PragmaHandler("dependency") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &DepToken) override { PP.HandlePragmaDependency(DepToken); } @@ -1038,8 +1009,8 @@ struct PragmaDependencyHandler : public PragmaHandler { struct PragmaDebugHandler : public PragmaHandler { PragmaDebugHandler() : PragmaHandler("__debug") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, - Token &DepToken) override { + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, + Token &DebugToken) override { Token Tok; PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::identifier)) { @@ -1057,7 +1028,7 @@ struct PragmaDebugHandler : public PragmaHandler { Crasher.startToken(); Crasher.setKind(tok::annot_pragma_parser_crash); Crasher.setAnnotationRange(SourceRange(Tok.getLocation())); - PP.EnterToken(Crasher); + PP.EnterToken(Crasher, /*IsReinject*/false); } else if (II->isStr("dump")) { Token Identifier; PP.LexUnexpandedToken(Identifier); @@ -1069,7 +1040,7 @@ struct PragmaDebugHandler : public PragmaHandler { SourceRange(Tok.getLocation(), Identifier.getLocation())); DumpAnnot.setAnnotationValue(DumpII); PP.DiscardUntilEndOfDirective(); - PP.EnterToken(DumpAnnot); + PP.EnterToken(DumpAnnot, /*IsReinject*/false); } else { PP.Diag(Identifier, diag::warn_pragma_debug_missing_argument) << II->getName(); @@ -1101,6 +1072,22 @@ struct PragmaDebugHandler : public PragmaHandler { else PP.Diag(MacroName, diag::warn_pragma_debug_missing_argument) << II->getName(); + } else if (II->isStr("module_map")) { + llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8> + ModuleName; + if (LexModuleName(PP, Tok, ModuleName)) + return; + ModuleMap &MM = PP.getHeaderSearchInfo().getModuleMap(); + Module *M = nullptr; + for (auto IIAndLoc : ModuleName) { + M = MM.lookupModuleQualified(IIAndLoc.first->getName(), M); + if (!M) { + PP.Diag(IIAndLoc.second, diag::warn_pragma_debug_unknown_module) + << IIAndLoc.first; + return; + } + } + M->dump(); } else if (II->isStr("overflow_stack")) { DebugOverflowStack(); } else if (II->isStr("handle_crash")) { @@ -1136,7 +1123,8 @@ struct PragmaDebugHandler : public PragmaHandler { Toks[0].setKind(tok::annot_pragma_captured); Toks[0].setLocation(NameLoc); - PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true); + PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true, + /*IsReinject=*/false); } // Disable MSVC warning about runtime stack overflow. @@ -1161,7 +1149,7 @@ public: explicit PragmaDiagnosticHandler(const char *NS) : PragmaHandler("diagnostic"), Namespace(NS) {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &DiagToken) override { SourceLocation DiagLoc = DiagToken.getLocation(); Token Tok; @@ -1203,7 +1191,7 @@ public: std::string WarningName; if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic", - /*MacroExpansion=*/false)) + /*AllowMacroExpansion=*/false)) return; if (Tok.isNot(tok::eod)) { @@ -1240,7 +1228,7 @@ public: /// "\#pragma hdrstop [<header-name-string>]" struct PragmaHdrstopHandler : public PragmaHandler { PragmaHdrstopHandler() : PragmaHandler("hdrstop") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &DepToken) override { PP.HandlePragmaHdrstop(DepToken); } @@ -1252,7 +1240,7 @@ struct PragmaHdrstopHandler : public PragmaHandler { struct PragmaWarningHandler : public PragmaHandler { PragmaWarningHandler() : PragmaHandler("warning") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &Tok) override { // Parse things like: // warning(push, 1) @@ -1369,11 +1357,75 @@ struct PragmaWarningHandler : public PragmaHandler { } }; +/// "\#pragma execution_character_set(...)". MSVC supports this pragma only +/// for "UTF-8". We parse it and ignore it if UTF-8 is provided and warn +/// otherwise to avoid -Wunknown-pragma warnings. +struct PragmaExecCharsetHandler : public PragmaHandler { + PragmaExecCharsetHandler() : PragmaHandler("execution_character_set") {} + + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, + Token &Tok) override { + // Parse things like: + // execution_character_set(push, "UTF-8") + // execution_character_set(pop) + SourceLocation DiagLoc = Tok.getLocation(); + PPCallbacks *Callbacks = PP.getPPCallbacks(); + + PP.Lex(Tok); + if (Tok.isNot(tok::l_paren)) { + PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << "("; + return; + } + + PP.Lex(Tok); + IdentifierInfo *II = Tok.getIdentifierInfo(); + + if (II && II->isStr("push")) { + // #pragma execution_character_set( push[ , string ] ) + PP.Lex(Tok); + if (Tok.is(tok::comma)) { + PP.Lex(Tok); + + std::string ExecCharset; + if (!PP.FinishLexStringLiteral(Tok, ExecCharset, + "pragma execution_character_set", + /*AllowMacroExpansion=*/false)) + return; + + // MSVC supports either of these, but nothing else. + if (ExecCharset != "UTF-8" && ExecCharset != "utf-8") { + PP.Diag(Tok, diag::warn_pragma_exec_charset_push_invalid) << ExecCharset; + return; + } + } + if (Callbacks) + Callbacks->PragmaExecCharsetPush(DiagLoc, "UTF-8"); + } else if (II && II->isStr("pop")) { + // #pragma execution_character_set( pop ) + PP.Lex(Tok); + if (Callbacks) + Callbacks->PragmaExecCharsetPop(DiagLoc); + } else { + PP.Diag(Tok, diag::warn_pragma_exec_charset_spec_invalid); + return; + } + + if (Tok.isNot(tok::r_paren)) { + PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << ")"; + return; + } + + PP.Lex(Tok); + if (Tok.isNot(tok::eod)) + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma execution_character_set"; + } +}; + /// PragmaIncludeAliasHandler - "\#pragma include_alias("...")". struct PragmaIncludeAliasHandler : public PragmaHandler { PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &IncludeAliasTok) override { PP.HandlePragmaIncludeAlias(IncludeAliasTok); } @@ -1416,7 +1468,7 @@ public: : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind), Namespace(Namespace) {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &Tok) override { SourceLocation MessageLoc = Tok.getLocation(); PP.Lex(Tok); @@ -1438,7 +1490,7 @@ public: std::string MessageString; if (!PP.FinishLexStringLiteral(Tok, MessageString, PragmaKind(Kind), - /*MacroExpansion=*/true)) + /*AllowMacroExpansion=*/true)) return; if (ExpectClosingParen) { @@ -1472,7 +1524,7 @@ public: struct PragmaModuleImportHandler : public PragmaHandler { PragmaModuleImportHandler() : PragmaHandler("import") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &Tok) override { SourceLocation ImportLoc = Tok.getLocation(); @@ -1488,7 +1540,7 @@ struct PragmaModuleImportHandler : public PragmaHandler { // If we have a non-empty module path, load the named module. Module *Imported = PP.getModuleLoader().loadModule(ImportLoc, ModuleName, Module::Hidden, - /*IsIncludeDirective=*/false); + /*IsInclusionDirective=*/false); if (!Imported) return; @@ -1509,7 +1561,7 @@ struct PragmaModuleImportHandler : public PragmaHandler { struct PragmaModuleBeginHandler : public PragmaHandler { PragmaModuleBeginHandler() : PragmaHandler("begin") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &Tok) override { SourceLocation BeginLoc = Tok.getLocation(); @@ -1533,16 +1585,15 @@ struct PragmaModuleBeginHandler : public PragmaHandler { // Find the module we're entering. We require that a module map for it // be loaded or implicitly loadable. - // FIXME: We could create the submodule here. We'd need to know whether - // it's supposed to be explicit, but not much else. - Module *M = PP.getHeaderSearchInfo().lookupModule(Current); + auto &HSI = PP.getHeaderSearchInfo(); + Module *M = HSI.lookupModule(Current); if (!M) { PP.Diag(ModuleName.front().second, diag::err_pp_module_begin_no_module_map) << Current; return; } for (unsigned I = 1; I != ModuleName.size(); ++I) { - auto *NewM = M->findSubmodule(ModuleName[I].first->getName()); + auto *NewM = M->findOrInferSubmodule(ModuleName[I].first->getName()); if (!NewM) { PP.Diag(ModuleName[I].second, diag::err_pp_module_begin_no_submodule) << M->getFullModuleName() << ModuleName[I].first; @@ -1570,7 +1621,7 @@ struct PragmaModuleBeginHandler : public PragmaHandler { struct PragmaModuleEndHandler : public PragmaHandler { PragmaModuleEndHandler() : PragmaHandler("end") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &Tok) override { SourceLocation Loc = Tok.getLocation(); @@ -1590,7 +1641,7 @@ struct PragmaModuleEndHandler : public PragmaHandler { struct PragmaModuleBuildHandler : public PragmaHandler { PragmaModuleBuildHandler() : PragmaHandler("build") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &Tok) override { PP.HandlePragmaModuleBuild(Tok); } @@ -1600,7 +1651,7 @@ struct PragmaModuleBuildHandler : public PragmaHandler { struct PragmaModuleLoadHandler : public PragmaHandler { PragmaModuleLoadHandler() : PragmaHandler("load") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &Tok) override { SourceLocation Loc = Tok.getLocation(); @@ -1615,7 +1666,7 @@ struct PragmaModuleLoadHandler : public PragmaHandler { // Load the module, don't make it visible. PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden, - /*IsIncludeDirective=*/false); + /*IsInclusionDirective=*/false); } }; @@ -1624,7 +1675,7 @@ struct PragmaModuleLoadHandler : public PragmaHandler { struct PragmaPushMacroHandler : public PragmaHandler { PragmaPushMacroHandler() : PragmaHandler("push_macro") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &PushMacroTok) override { PP.HandlePragmaPushMacro(PushMacroTok); } @@ -1635,7 +1686,7 @@ struct PragmaPushMacroHandler : public PragmaHandler { struct PragmaPopMacroHandler : public PragmaHandler { PragmaPopMacroHandler() : PragmaHandler("pop_macro") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &PopMacroTok) override { PP.HandlePragmaPopMacro(PopMacroTok); } @@ -1646,7 +1697,7 @@ struct PragmaPopMacroHandler : public PragmaHandler { struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &NameTok) override { SourceLocation Loc = NameTok.getLocation(); bool IsBegin; @@ -1701,7 +1752,7 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { struct PragmaAssumeNonNullHandler : public PragmaHandler { PragmaAssumeNonNullHandler() : PragmaHandler("assume_nonnull") {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &NameTok) override { SourceLocation Loc = NameTok.getLocation(); bool IsBegin; @@ -1770,7 +1821,7 @@ struct PragmaAssumeNonNullHandler : public PragmaHandler { struct PragmaRegionHandler : public PragmaHandler { PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) {} - void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &NameTok) override { // #pragma region: endregion matches can be verified // __pragma(region): no sense, but ignored by msvc @@ -1824,6 +1875,7 @@ void Preprocessor::RegisterBuiltinPragmas() { // MS extensions. if (LangOpts.MicrosoftExt) { AddPragmaHandler(new PragmaWarningHandler()); + AddPragmaHandler(new PragmaExecCharsetHandler()); AddPragmaHandler(new PragmaIncludeAliasHandler()); AddPragmaHandler(new PragmaHdrstopHandler()); } diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp index b37a8cf1ced49..115256db48095 100644 --- a/lib/Lex/PreprocessingRecord.cpp +++ b/lib/Lex/PreprocessingRecord.cpp @@ -1,9 +1,8 @@ //===- PreprocessingRecord.cpp - Record of Preprocessing ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -239,16 +238,13 @@ unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity( return First - PreprocessedEntities.begin(); } -unsigned PreprocessingRecord::findEndLocalPreprocessedEntity( - SourceLocation Loc) const { +unsigned +PreprocessingRecord::findEndLocalPreprocessedEntity(SourceLocation Loc) const { if (SourceMgr.isLoadedSourceLocation(Loc)) return 0; - std::vector<PreprocessedEntity *>::const_iterator - I = std::upper_bound(PreprocessedEntities.begin(), - PreprocessedEntities.end(), - Loc, - PPEntityComp<&SourceRange::getBegin>(SourceMgr)); + auto I = llvm::upper_bound(PreprocessedEntities, Loc, + PPEntityComp<&SourceRange::getBegin>(SourceMgr)); return I - PreprocessedEntities.begin(); } @@ -306,10 +302,9 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { } // Linear search unsuccessful. Do a binary search. - pp_iter I = std::upper_bound(PreprocessedEntities.begin(), - PreprocessedEntities.end(), - BeginLoc, - PPEntityComp<&SourceRange::getBegin>(SourceMgr)); + pp_iter I = + llvm::upper_bound(PreprocessedEntities, BeginLoc, + PPEntityComp<&SourceRange::getBegin>(SourceMgr)); pp_iter insertI = PreprocessedEntities.insert(I, Entity); return getPPEntityID(insertI - PreprocessedEntities.begin(), /*isLoaded=*/false); diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 047a4caaca73f..bdc5fbcd2beab 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -1,9 +1,8 @@ -//===- Preprocess.cpp - C Language Family Preprocessor Implementation -----===// +//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -78,12 +77,12 @@ ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, DiagnosticsEngine &diags, LangOptions &opts, - SourceManager &SM, MemoryBufferCache &PCMCache, - HeaderSearch &Headers, ModuleLoader &TheModuleLoader, + SourceManager &SM, HeaderSearch &Headers, + ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup, bool OwnsHeaders, TranslationUnitKind TUKind) : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), - FileMgr(Headers.getFileMgr()), SourceMgr(SM), PCMCache(PCMCache), + FileMgr(Headers.getFileMgr()), SourceMgr(SM), ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), // As the language options may have not been loaded yet (when @@ -103,6 +102,7 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, DisableMacroExpansion = false; MacroExpansionInDirectivesOverride = false; InMacroArgs = false; + ArgMacro = nullptr; InMacroArgPreExpansion = false; NumCachedTokenLexers = 0; PragmasEnabled = true; @@ -567,7 +567,8 @@ void Preprocessor::EnterMainSourceFile() { SourceLocation(), PPOpts->PCHThroughHeader, /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, - /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr); + /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr, + /*IsFrameworkFound=*/nullptr); if (!File) { Diag(SourceLocation(), diag::err_pp_through_header_not_found) << PPOpts->PCHThroughHeader; @@ -624,8 +625,22 @@ void Preprocessor::SkipTokensWhileUsingPCH() { bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop; Token Tok; while (true) { - bool InPredefines = (CurLexer->getFileID() == getPredefinesFileID()); - CurLexer->Lex(Tok); + bool InPredefines = + (CurLexer && CurLexer->getFileID() == getPredefinesFileID()); + switch (CurLexerKind) { + case CLK_Lexer: + CurLexer->Lex(Tok); + break; + case CLK_TokenLexer: + CurTokenLexer->Lex(Tok); + break; + case CLK_CachingLexer: + CachingLex(Tok); + break; + case CLK_LexAfterModuleImport: + LexAfterModuleImport(Tok); + break; + } if (Tok.is(tok::eof) && !InPredefines) { ReachedMainFileEOF = true; break; @@ -861,6 +876,8 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { } void Preprocessor::Lex(Token &Result) { + ++LexLevel; + // We loop here until a lex function returns a token; this avoids recursion. bool ReturnedToken; do { @@ -876,8 +893,7 @@ void Preprocessor::Lex(Token &Result) { ReturnedToken = true; break; case CLK_LexAfterModuleImport: - LexAfterModuleImport(Result); - ReturnedToken = true; + ReturnedToken = LexAfterModuleImport(Result); break; } } while (!ReturnedToken); @@ -891,17 +907,296 @@ void Preprocessor::Lex(Token &Result) { Result.setIdentifierInfo(nullptr); } + // Update ImportSeqState to track our position within a C++20 import-seq + // if this token is being produced as a result of phase 4 of translation. + if (getLangOpts().CPlusPlusModules && LexLevel == 1 && + !Result.getFlag(Token::IsReinjected)) { + switch (Result.getKind()) { + case tok::l_paren: case tok::l_square: case tok::l_brace: + ImportSeqState.handleOpenBracket(); + break; + case tok::r_paren: case tok::r_square: + ImportSeqState.handleCloseBracket(); + break; + case tok::r_brace: + ImportSeqState.handleCloseBrace(); + break; + case tok::semi: + ImportSeqState.handleSemi(); + break; + case tok::header_name: + case tok::annot_header_unit: + ImportSeqState.handleHeaderName(); + break; + case tok::kw_export: + ImportSeqState.handleExport(); + break; + case tok::identifier: + if (Result.getIdentifierInfo()->isModulesImport()) { + ImportSeqState.handleImport(); + if (ImportSeqState.afterImportSeq()) { + ModuleImportLoc = Result.getLocation(); + ModuleImportPath.clear(); + ModuleImportExpectsIdentifier = true; + CurLexerKind = CLK_LexAfterModuleImport; + } + break; + } + LLVM_FALLTHROUGH; + default: + ImportSeqState.handleMisc(); + break; + } + } + LastTokenWasAt = Result.is(tok::at); + --LexLevel; + if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected)) + OnToken(Result); +} + +/// Lex a header-name token (including one formed from header-name-tokens if +/// \p AllowConcatenation is \c true). +/// +/// \param FilenameTok Filled in with the next token. On success, this will +/// be either a header_name token. On failure, it will be whatever other +/// token was found instead. +/// \param AllowMacroExpansion If \c true, allow the header name to be formed +/// by macro expansion (concatenating tokens as necessary if the first +/// token is a '<'). +/// \return \c true if we reached EOD or EOF while looking for a > token in +/// a concatenated header name and diagnosed it. \c false otherwise. +bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { + // Lex using header-name tokenization rules if tokens are being lexed from + // a file. Just grab a token normally if we're in a macro expansion. + if (CurPPLexer) + CurPPLexer->LexIncludeFilename(FilenameTok); + else + Lex(FilenameTok); + + // This could be a <foo/bar.h> file coming from a macro expansion. In this + // case, glue the tokens together into an angle_string_literal token. + SmallString<128> FilenameBuffer; + if (FilenameTok.is(tok::less) && AllowMacroExpansion) { + bool StartOfLine = FilenameTok.isAtStartOfLine(); + bool LeadingSpace = FilenameTok.hasLeadingSpace(); + bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro(); + + SourceLocation Start = FilenameTok.getLocation(); + SourceLocation End; + FilenameBuffer.push_back('<'); + + // Consume tokens until we find a '>'. + // FIXME: A header-name could be formed starting or ending with an + // alternative token. It's not clear whether that's ill-formed in all + // cases. + while (FilenameTok.isNot(tok::greater)) { + Lex(FilenameTok); + if (FilenameTok.isOneOf(tok::eod, tok::eof)) { + Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater; + Diag(Start, diag::note_matching) << tok::less; + return true; + } + + End = FilenameTok.getLocation(); + + // FIXME: Provide code completion for #includes. + if (FilenameTok.is(tok::code_completion)) { + setCodeCompletionReached(); + Lex(FilenameTok); + continue; + } + + // Append the spelling of this token to the buffer. If there was a space + // before it, add it now. + if (FilenameTok.hasLeadingSpace()) + FilenameBuffer.push_back(' '); + + // Get the spelling of the token, directly into FilenameBuffer if + // possible. + size_t PreAppendSize = FilenameBuffer.size(); + FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength()); + + const char *BufPtr = &FilenameBuffer[PreAppendSize]; + unsigned ActualLen = getSpelling(FilenameTok, BufPtr); + + // If the token was spelled somewhere else, copy it into FilenameBuffer. + if (BufPtr != &FilenameBuffer[PreAppendSize]) + memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); + + // Resize FilenameBuffer to the correct size. + if (FilenameTok.getLength() != ActualLen) + FilenameBuffer.resize(PreAppendSize + ActualLen); + } + + FilenameTok.startToken(); + FilenameTok.setKind(tok::header_name); + FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine); + FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace); + FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro); + CreateString(FilenameBuffer, FilenameTok, Start, End); + } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) { + // Convert a string-literal token of the form " h-char-sequence " + // (produced by macro expansion) into a header-name token. + // + // The rules for header-names don't quite match the rules for + // string-literals, but all the places where they differ result in + // undefined behavior, so we can and do treat them the same. + // + // A string-literal with a prefix or suffix is not translated into a + // header-name. This could theoretically be observable via the C++20 + // context-sensitive header-name formation rules. + StringRef Str = getSpelling(FilenameTok, FilenameBuffer); + if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"') + FilenameTok.setKind(tok::header_name); + } + + return false; +} + +/// Collect the tokens of a C++20 pp-import-suffix. +void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { + // FIXME: For error recovery, consider recognizing attribute syntax here + // and terminating / diagnosing a missing semicolon if we find anything + // else? (Can we leave that to the parser?) + unsigned BracketDepth = 0; + while (true) { + Toks.emplace_back(); + Lex(Toks.back()); + + switch (Toks.back().getKind()) { + case tok::l_paren: case tok::l_square: case tok::l_brace: + ++BracketDepth; + break; + + case tok::r_paren: case tok::r_square: case tok::r_brace: + if (BracketDepth == 0) + return; + --BracketDepth; + break; + + case tok::semi: + if (BracketDepth == 0) + return; + break; + + case tok::eof: + return; + + default: + break; + } + } } + /// Lex a token following the 'import' contextual keyword. /// -void Preprocessor::LexAfterModuleImport(Token &Result) { +/// pp-import: [C++20] +/// import header-name pp-import-suffix[opt] ; +/// import header-name-tokens pp-import-suffix[opt] ; +/// [ObjC] @ import module-name ; +/// [Clang] import module-name ; +/// +/// header-name-tokens: +/// string-literal +/// < [any sequence of preprocessing-tokens other than >] > +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +/// +/// We respond to a pp-import by importing macros from the named module. +bool Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. recomputeCurLexerKind(); - // Lex the next token. - Lex(Result); + // Lex the next token. The header-name lexing rules are used at the start of + // a pp-import. + // + // For now, we only support header-name imports in C++20 mode. + // FIXME: Should we allow this in all language modes that support an import + // declaration as an extension? + if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { + if (LexHeaderName(Result)) + return true; + } else { + Lex(Result); + } + + // Allocate a holding buffer for a sequence of tokens and introduce it into + // the token stream. + auto EnterTokens = [this](ArrayRef<Token> Toks) { + auto ToksCopy = llvm::make_unique<Token[]>(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), + /*DisableMacroExpansion*/ true, /*IsReinject*/ false); + }; + + // Check for a header-name. + SmallVector<Token, 32> Suffix; + if (Result.is(tok::header_name)) { + // Enter the header-name token into the token stream; a Lex action cannot + // both return a token and cache tokens (doing so would corrupt the token + // cache if the call to Lex comes from CachingLex / PeekAhead). + Suffix.push_back(Result); + + // Consume the pp-import-suffix and expand any macros in it now. We'll add + // it back into the token stream later. + CollectPpImportSuffix(Suffix); + if (Suffix.back().isNot(tok::semi)) { + // This is not a pp-import after all. + EnterTokens(Suffix); + return false; + } + + // C++2a [cpp.module]p1: + // The ';' preprocessing-token terminating a pp-import shall not have + // been produced by macro replacement. + SourceLocation SemiLoc = Suffix.back().getLocation(); + if (SemiLoc.isMacroID()) + Diag(SemiLoc, diag::err_header_import_semi_in_macro); + + // Reconstitute the import token. + Token ImportTok; + ImportTok.startToken(); + ImportTok.setKind(tok::kw_import); + ImportTok.setLocation(ModuleImportLoc); + ImportTok.setIdentifierInfo(getIdentifierInfo("import")); + ImportTok.setLength(6); + + auto Action = HandleHeaderIncludeOrImport( + /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); + switch (Action.Kind) { + case ImportAction::None: + break; + + case ImportAction::ModuleBegin: + // Let the parser know we're textually entering the module. + Suffix.emplace_back(); + Suffix.back().startToken(); + Suffix.back().setKind(tok::annot_module_begin); + Suffix.back().setLocation(SemiLoc); + Suffix.back().setAnnotationEndLoc(SemiLoc); + Suffix.back().setAnnotationValue(Action.ModuleForHeader); + LLVM_FALLTHROUGH; + + case ImportAction::ModuleImport: + case ImportAction::SkippedModuleImport: + // We chose to import (or textually enter) the file. Convert the + // header-name token into a header unit annotation token. + Suffix[0].setKind(tok::annot_header_unit); + Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); + Suffix[0].setAnnotationValue(Action.ModuleForHeader); + // FIXME: Call the moduleImport callback? + break; + } + + EnterTokens(Suffix); + return false; + } // The token sequence // @@ -916,7 +1211,7 @@ void Preprocessor::LexAfterModuleImport(Token &Result) { Result.getLocation())); ModuleImportExpectsIdentifier = false; CurLexerKind = CLK_LexAfterModuleImport; - return; + return true; } // If we're expecting a '.' or a ';', and we got a '.', then wait until we @@ -925,40 +1220,61 @@ void Preprocessor::LexAfterModuleImport(Token &Result) { if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { ModuleImportExpectsIdentifier = true; CurLexerKind = CLK_LexAfterModuleImport; - return; + return true; } - // If we have a non-empty module path, load the named module. - if (!ModuleImportPath.empty()) { - // Under the Modules TS, the dot is just part of the module name, and not - // a real hierarchy separator. Flatten such module names now. - // - // FIXME: Is this the right level to be performing this transformation? - std::string FlatModuleName; - if (getLangOpts().ModulesTS) { - for (auto &Piece : ModuleImportPath) { - if (!FlatModuleName.empty()) - FlatModuleName += "."; - FlatModuleName += Piece.first->getName(); - } - SourceLocation FirstPathLoc = ModuleImportPath[0].second; - ModuleImportPath.clear(); - ModuleImportPath.push_back( - std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); + // If we didn't recognize a module name at all, this is not a (valid) import. + if (ModuleImportPath.empty() || Result.is(tok::eof)) + return true; + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + SourceLocation SemiLoc = Result.getLocation(); + if (Result.isNot(tok::semi)) { + Suffix.push_back(Result); + CollectPpImportSuffix(Suffix); + if (Suffix.back().isNot(tok::semi)) { + // This is not an import after all. + EnterTokens(Suffix); + return false; } + SemiLoc = Suffix.back().getLocation(); + } - Module *Imported = nullptr; - if (getLangOpts().Modules) { - Imported = TheModuleLoader.loadModule(ModuleImportLoc, - ModuleImportPath, - Module::Hidden, - /*IsIncludeDirective=*/false); - if (Imported) - makeModuleVisible(Imported, ModuleImportLoc); + // Under the Modules TS, the dot is just part of the module name, and not + // a real hierarchy separator. Flatten such module names now. + // + // FIXME: Is this the right level to be performing this transformation? + std::string FlatModuleName; + if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) { + for (auto &Piece : ModuleImportPath) { + if (!FlatModuleName.empty()) + FlatModuleName += "."; + FlatModuleName += Piece.first->getName(); } - if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) - Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); + SourceLocation FirstPathLoc = ModuleImportPath[0].second; + ModuleImportPath.clear(); + ModuleImportPath.push_back( + std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); + } + + Module *Imported = nullptr; + if (getLangOpts().Modules) { + Imported = TheModuleLoader.loadModule(ModuleImportLoc, + ModuleImportPath, + Module::Hidden, + /*IsInclusionDirective=*/false); + if (Imported) + makeModuleVisible(Imported, SemiLoc); } + if (Callbacks) + Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); + + if (!Suffix.empty()) { + EnterTokens(Suffix); + return false; + } + return true; } void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { @@ -1039,14 +1355,14 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { void Preprocessor::addCommentHandler(CommentHandler *Handler) { assert(Handler && "NULL comment handler"); - assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == - CommentHandlers.end() && "Comment handler already registered"); + assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() && + "Comment handler already registered"); CommentHandlers.push_back(Handler); } void Preprocessor::removeCommentHandler(CommentHandler *Handler) { std::vector<CommentHandler *>::iterator Pos = - std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); + llvm::find(CommentHandlers, Handler); assert(Pos != CommentHandlers.end() && "Comment handler not registered"); CommentHandlers.erase(Pos); } diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index 9f930c3a3c6a0..5f6f4a13419be 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -1,9 +1,8 @@ //===- PreprocessorLexer.cpp - C Language Family Lexer --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -31,9 +30,7 @@ PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) /// After the preprocessor has parsed a \#include, lex and /// (potentially) macro expand the filename. void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { - assert(ParsingPreprocessorDirective && - ParsingFilename == false && - "Must be in a preprocessing directive!"); + assert(ParsingFilename == false && "reentered LexIncludeFilename"); // We are now parsing a filename! ParsingFilename = true; @@ -46,10 +43,6 @@ void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { // We should have obtained the filename now. ParsingFilename = false; - - // No filename? - if (FilenameTok.is(tok::eod)) - PP->Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); } /// getFileEntry - Return the FileEntry corresponding to this FileID. Like diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp index dc03e16daa8b6..19ab93ec54b47 100644 --- a/lib/Lex/ScratchBuffer.cpp +++ b/lib/Lex/ScratchBuffer.cpp @@ -1,9 +1,8 @@ //===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp index f810c28ccdf1d..e626cfcc927f5 100644 --- a/lib/Lex/TokenConcatenation.cpp +++ b/lib/Lex/TokenConcatenation.cpp @@ -1,9 +1,8 @@ //===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -161,6 +160,11 @@ static char GetFirstChar(const Preprocessor &PP, const Token &Tok) { bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const { + // Conservatively assume that every annotation token that has a printable + // form requires whitespace. + if (PrevTok.isAnnotation()) + return true; + // First, check to see if the tokens were directly adjacent in the original // source. If they were, it must be okay to stick them together: if there // were an issue, the tokens would have been lexed differently. diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index 608e0dedebb7e..a7957e82e4955 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -1,9 +1,8 @@ //===- TokenLexer.cpp - Lex from a token stream ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -54,6 +53,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, Tokens = &*Macro->tokens_begin(); OwnsTokens = false; DisableMacroExpansion = false; + IsReinject = false; NumTokens = Macro->tokens_end()-Macro->tokens_begin(); MacroExpansionStart = SourceLocation(); @@ -92,7 +92,9 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, /// Create a TokenLexer for the specified token stream. This does not /// take ownership of the specified token vector. void TokenLexer::Init(const Token *TokArray, unsigned NumToks, - bool disableMacroExpansion, bool ownsTokens) { + bool disableMacroExpansion, bool ownsTokens, + bool isReinject) { + assert(!isReinject || disableMacroExpansion); // If the client is reusing a TokenLexer, make sure to free any memory // associated with it. destroy(); @@ -102,6 +104,7 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks, Tokens = TokArray; OwnsTokens = ownsTokens; DisableMacroExpansion = disableMacroExpansion; + IsReinject = isReinject; NumTokens = NumToks; CurTokenIdx = 0; ExpandLocStart = ExpandLocEnd = SourceLocation(); @@ -244,8 +247,7 @@ void TokenLexer::ExpandFunctionArguments() { // we install the newly expanded sequence as the new 'Tokens' list. bool MadeChange = false; - const bool CalledWithVariadicArguments = - ActualArgs->invokedWithVariadicArgument(Macro); + Optional<bool> CalledWithVariadicArguments; VAOptExpansionContext VCtx(PP); @@ -292,7 +294,12 @@ void TokenLexer::ExpandFunctionArguments() { // this token. Note sawClosingParen() returns true only if the r_paren matches // the closing r_paren of the __VA_OPT__. if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) { - if (!CalledWithVariadicArguments) { + // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__. + if (!CalledWithVariadicArguments.hasValue()) { + CalledWithVariadicArguments = + ActualArgs->invokedWithVariadicArgument(Macro, PP); + } + if (!*CalledWithVariadicArguments) { // Skip this token. continue; } @@ -315,8 +322,8 @@ void TokenLexer::ExpandFunctionArguments() { stringifyVAOPTContents(ResultToks, VCtx, /*ClosingParenLoc*/ Tokens[I].getLocation()); - } else if (/*No tokens within VAOPT*/ !( - ResultToks.size() - VCtx.getNumberOfTokensPriorToVAOpt())) { + } else if (/*No tokens within VAOPT*/ + ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) { // Treat VAOPT as a placemarker token. Eat either the '##' before the // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that // hashhash was not a placemarker) or the '##' @@ -327,6 +334,26 @@ void TokenLexer::ExpandFunctionArguments() { } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) { ++I; // Skip the following hashhash. } + } else { + // If there's a ## before the __VA_OPT__, we might have discovered + // that the __VA_OPT__ begins with a placeholder. We delay action on + // that to now to avoid messing up our stashed count of tokens before + // __VA_OPT__. + if (VCtx.beginsWithPlaceholder()) { + assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 && + ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() && + ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is( + tok::hashhash) && + "no token paste before __VA_OPT__"); + ResultToks.erase(ResultToks.begin() + + VCtx.getNumberOfTokensPriorToVAOpt() - 1); + } + // If the expansion of __VA_OPT__ ends with a placeholder, eat any + // following '##' token. + if (VCtx.endsWithPlaceholder() && I + 1 != E && + Tokens[I + 1].is(tok::hashhash)) { + ++I; + } } VCtx.reset(); // We processed __VA_OPT__'s closing paren (and the exit out of @@ -387,6 +414,7 @@ void TokenLexer::ExpandFunctionArguments() { !ResultToks.empty() && ResultToks.back().is(tok::hashhash); bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash); bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash); + bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren); assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) && "unexpected ## in ResultToks"); @@ -471,6 +499,18 @@ void TokenLexer::ExpandFunctionArguments() { NextTokGetsSpace); ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false); NextTokGetsSpace = false; + } else { + // We're creating a placeholder token. Usually this doesn't matter, + // but it can affect paste behavior when at the start or end of a + // __VA_OPT__. + if (NonEmptyPasteBefore) { + // We're imagining a placeholder token is inserted here. If this is + // the first token in a __VA_OPT__ after a ##, delete the ##. + assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__"); + VCtx.hasPlaceholderAfterHashhashAtStart(); + } + if (RParenAfter) + VCtx.hasPlaceholderBeforeRParen(); } continue; } @@ -535,6 +575,9 @@ void TokenLexer::ExpandFunctionArguments() { continue; } + if (RParenAfter) + VCtx.hasPlaceholderBeforeRParen(); + // If this is on the RHS of a paste operator, we've already copied the // paste operator to the ResultToks list, unless the LHS was empty too. // Remove it. @@ -548,6 +591,8 @@ void TokenLexer::ExpandFunctionArguments() { if (!VCtx.isInVAOpt() || ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt()) ResultToks.pop_back(); + else + VCtx.hasPlaceholderAfterHashhashAtStart(); } // If this is the __VA_ARGS__ token, and if the argument wasn't provided, @@ -606,6 +651,8 @@ bool TokenLexer::Lex(Token &Tok) { // Get the next token to return. Tok = Tokens[CurTokenIdx++]; + if (IsReinject) + Tok.setFlag(Token::IsReinjected); bool TokenIsFromPaste = false; diff --git a/lib/Lex/UnicodeCharSets.h b/lib/Lex/UnicodeCharSets.h index 116d553d20404..d56bc8ef6721e 100644 --- a/lib/Lex/UnicodeCharSets.h +++ b/lib/Lex/UnicodeCharSets.h @@ -1,9 +1,8 @@ //===--- UnicodeCharSets.h - Contains important sets of characters --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_LEX_UNICODECHARSETS_H |
