summaryrefslogtreecommitdiff
path: root/lib/Lex/Preprocessor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/Preprocessor.cpp')
-rw-r--r--lib/Lex/Preprocessor.cpp412
1 files changed, 364 insertions, 48 deletions
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 047a4caaca73..bdc5fbcd2bea 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -1,9 +1,8 @@
-//===- Preprocess.cpp - C Language Family Preprocessor Implementation -----===//
+//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -78,12 +77,12 @@ ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
DiagnosticsEngine &diags, LangOptions &opts,
- SourceManager &SM, MemoryBufferCache &PCMCache,
- HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
+ SourceManager &SM, HeaderSearch &Headers,
+ ModuleLoader &TheModuleLoader,
IdentifierInfoLookup *IILookup, bool OwnsHeaders,
TranslationUnitKind TUKind)
: PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
- FileMgr(Headers.getFileMgr()), SourceMgr(SM), PCMCache(PCMCache),
+ FileMgr(Headers.getFileMgr()), SourceMgr(SM),
ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
// As the language options may have not been loaded yet (when
@@ -103,6 +102,7 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
DisableMacroExpansion = false;
MacroExpansionInDirectivesOverride = false;
InMacroArgs = false;
+ ArgMacro = nullptr;
InMacroArgPreExpansion = false;
NumCachedTokenLexers = 0;
PragmasEnabled = true;
@@ -567,7 +567,8 @@ void Preprocessor::EnterMainSourceFile() {
SourceLocation(), PPOpts->PCHThroughHeader,
/*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
/*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
- /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr);
+ /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
+ /*IsFrameworkFound=*/nullptr);
if (!File) {
Diag(SourceLocation(), diag::err_pp_through_header_not_found)
<< PPOpts->PCHThroughHeader;
@@ -624,8 +625,22 @@ void Preprocessor::SkipTokensWhileUsingPCH() {
bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
Token Tok;
while (true) {
- bool InPredefines = (CurLexer->getFileID() == getPredefinesFileID());
- CurLexer->Lex(Tok);
+ bool InPredefines =
+ (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
+ switch (CurLexerKind) {
+ case CLK_Lexer:
+ CurLexer->Lex(Tok);
+ break;
+ case CLK_TokenLexer:
+ CurTokenLexer->Lex(Tok);
+ break;
+ case CLK_CachingLexer:
+ CachingLex(Tok);
+ break;
+ case CLK_LexAfterModuleImport:
+ LexAfterModuleImport(Tok);
+ break;
+ }
if (Tok.is(tok::eof) && !InPredefines) {
ReachedMainFileEOF = true;
break;
@@ -861,6 +876,8 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
}
void Preprocessor::Lex(Token &Result) {
+ ++LexLevel;
+
// We loop here until a lex function returns a token; this avoids recursion.
bool ReturnedToken;
do {
@@ -876,8 +893,7 @@ void Preprocessor::Lex(Token &Result) {
ReturnedToken = true;
break;
case CLK_LexAfterModuleImport:
- LexAfterModuleImport(Result);
- ReturnedToken = true;
+ ReturnedToken = LexAfterModuleImport(Result);
break;
}
} while (!ReturnedToken);
@@ -891,17 +907,296 @@ void Preprocessor::Lex(Token &Result) {
Result.setIdentifierInfo(nullptr);
}
+ // Update ImportSeqState to track our position within a C++20 import-seq
+ // if this token is being produced as a result of phase 4 of translation.
+ if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
+ !Result.getFlag(Token::IsReinjected)) {
+ switch (Result.getKind()) {
+ case tok::l_paren: case tok::l_square: case tok::l_brace:
+ ImportSeqState.handleOpenBracket();
+ break;
+ case tok::r_paren: case tok::r_square:
+ ImportSeqState.handleCloseBracket();
+ break;
+ case tok::r_brace:
+ ImportSeqState.handleCloseBrace();
+ break;
+ case tok::semi:
+ ImportSeqState.handleSemi();
+ break;
+ case tok::header_name:
+ case tok::annot_header_unit:
+ ImportSeqState.handleHeaderName();
+ break;
+ case tok::kw_export:
+ ImportSeqState.handleExport();
+ break;
+ case tok::identifier:
+ if (Result.getIdentifierInfo()->isModulesImport()) {
+ ImportSeqState.handleImport();
+ if (ImportSeqState.afterImportSeq()) {
+ ModuleImportLoc = Result.getLocation();
+ ModuleImportPath.clear();
+ ModuleImportExpectsIdentifier = true;
+ CurLexerKind = CLK_LexAfterModuleImport;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ default:
+ ImportSeqState.handleMisc();
+ break;
+ }
+ }
+
LastTokenWasAt = Result.is(tok::at);
+ --LexLevel;
+ if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected))
+ OnToken(Result);
+}
+
+/// Lex a header-name token (including one formed from header-name-tokens if
+/// \p AllowConcatenation is \c true).
+///
+/// \param FilenameTok Filled in with the next token. On success, this will
+/// be either a header_name token. On failure, it will be whatever other
+/// token was found instead.
+/// \param AllowMacroExpansion If \c true, allow the header name to be formed
+/// by macro expansion (concatenating tokens as necessary if the first
+/// token is a '<').
+/// \return \c true if we reached EOD or EOF while looking for a > token in
+/// a concatenated header name and diagnosed it. \c false otherwise.
+bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
+ // Lex using header-name tokenization rules if tokens are being lexed from
+ // a file. Just grab a token normally if we're in a macro expansion.
+ if (CurPPLexer)
+ CurPPLexer->LexIncludeFilename(FilenameTok);
+ else
+ Lex(FilenameTok);
+
+ // This could be a <foo/bar.h> file coming from a macro expansion. In this
+ // case, glue the tokens together into an angle_string_literal token.
+ SmallString<128> FilenameBuffer;
+ if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
+ bool StartOfLine = FilenameTok.isAtStartOfLine();
+ bool LeadingSpace = FilenameTok.hasLeadingSpace();
+ bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
+
+ SourceLocation Start = FilenameTok.getLocation();
+ SourceLocation End;
+ FilenameBuffer.push_back('<');
+
+ // Consume tokens until we find a '>'.
+ // FIXME: A header-name could be formed starting or ending with an
+ // alternative token. It's not clear whether that's ill-formed in all
+ // cases.
+ while (FilenameTok.isNot(tok::greater)) {
+ Lex(FilenameTok);
+ if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
+ Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
+ Diag(Start, diag::note_matching) << tok::less;
+ return true;
+ }
+
+ End = FilenameTok.getLocation();
+
+ // FIXME: Provide code completion for #includes.
+ if (FilenameTok.is(tok::code_completion)) {
+ setCodeCompletionReached();
+ Lex(FilenameTok);
+ continue;
+ }
+
+ // Append the spelling of this token to the buffer. If there was a space
+ // before it, add it now.
+ if (FilenameTok.hasLeadingSpace())
+ FilenameBuffer.push_back(' ');
+
+ // Get the spelling of the token, directly into FilenameBuffer if
+ // possible.
+ size_t PreAppendSize = FilenameBuffer.size();
+ FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
+
+ const char *BufPtr = &FilenameBuffer[PreAppendSize];
+ unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
+
+ // If the token was spelled somewhere else, copy it into FilenameBuffer.
+ if (BufPtr != &FilenameBuffer[PreAppendSize])
+ memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
+
+ // Resize FilenameBuffer to the correct size.
+ if (FilenameTok.getLength() != ActualLen)
+ FilenameBuffer.resize(PreAppendSize + ActualLen);
+ }
+
+ FilenameTok.startToken();
+ FilenameTok.setKind(tok::header_name);
+ FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
+ FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
+ FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
+ CreateString(FilenameBuffer, FilenameTok, Start, End);
+ } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
+ // Convert a string-literal token of the form " h-char-sequence "
+ // (produced by macro expansion) into a header-name token.
+ //
+ // The rules for header-names don't quite match the rules for
+ // string-literals, but all the places where they differ result in
+ // undefined behavior, so we can and do treat them the same.
+ //
+ // A string-literal with a prefix or suffix is not translated into a
+ // header-name. This could theoretically be observable via the C++20
+ // context-sensitive header-name formation rules.
+ StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
+ if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
+ FilenameTok.setKind(tok::header_name);
+ }
+
+ return false;
+}
+
+/// Collect the tokens of a C++20 pp-import-suffix.
+void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
+ // FIXME: For error recovery, consider recognizing attribute syntax here
+ // and terminating / diagnosing a missing semicolon if we find anything
+ // else? (Can we leave that to the parser?)
+ unsigned BracketDepth = 0;
+ while (true) {
+ Toks.emplace_back();
+ Lex(Toks.back());
+
+ switch (Toks.back().getKind()) {
+ case tok::l_paren: case tok::l_square: case tok::l_brace:
+ ++BracketDepth;
+ break;
+
+ case tok::r_paren: case tok::r_square: case tok::r_brace:
+ if (BracketDepth == 0)
+ return;
+ --BracketDepth;
+ break;
+
+ case tok::semi:
+ if (BracketDepth == 0)
+ return;
+ break;
+
+ case tok::eof:
+ return;
+
+ default:
+ break;
+ }
+ }
}
+
/// Lex a token following the 'import' contextual keyword.
///
-void Preprocessor::LexAfterModuleImport(Token &Result) {
+/// pp-import: [C++20]
+/// import header-name pp-import-suffix[opt] ;
+/// import header-name-tokens pp-import-suffix[opt] ;
+/// [ObjC] @ import module-name ;
+/// [Clang] import module-name ;
+///
+/// header-name-tokens:
+/// string-literal
+/// < [any sequence of preprocessing-tokens other than >] >
+///
+/// module-name:
+/// module-name-qualifier[opt] identifier
+///
+/// module-name-qualifier
+/// module-name-qualifier[opt] identifier .
+///
+/// We respond to a pp-import by importing macros from the named module.
+bool Preprocessor::LexAfterModuleImport(Token &Result) {
// Figure out what kind of lexer we actually have.
recomputeCurLexerKind();
- // Lex the next token.
- Lex(Result);
+ // Lex the next token. The header-name lexing rules are used at the start of
+ // a pp-import.
+ //
+ // For now, we only support header-name imports in C++20 mode.
+ // FIXME: Should we allow this in all language modes that support an import
+ // declaration as an extension?
+ if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
+ if (LexHeaderName(Result))
+ return true;
+ } else {
+ Lex(Result);
+ }
+
+ // Allocate a holding buffer for a sequence of tokens and introduce it into
+ // the token stream.
+ auto EnterTokens = [this](ArrayRef<Token> Toks) {
+ auto ToksCopy = llvm::make_unique<Token[]>(Toks.size());
+ std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+ EnterTokenStream(std::move(ToksCopy), Toks.size(),
+ /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+ };
+
+ // Check for a header-name.
+ SmallVector<Token, 32> Suffix;
+ if (Result.is(tok::header_name)) {
+ // Enter the header-name token into the token stream; a Lex action cannot
+ // both return a token and cache tokens (doing so would corrupt the token
+ // cache if the call to Lex comes from CachingLex / PeekAhead).
+ Suffix.push_back(Result);
+
+ // Consume the pp-import-suffix and expand any macros in it now. We'll add
+ // it back into the token stream later.
+ CollectPpImportSuffix(Suffix);
+ if (Suffix.back().isNot(tok::semi)) {
+ // This is not a pp-import after all.
+ EnterTokens(Suffix);
+ return false;
+ }
+
+ // C++2a [cpp.module]p1:
+ // The ';' preprocessing-token terminating a pp-import shall not have
+ // been produced by macro replacement.
+ SourceLocation SemiLoc = Suffix.back().getLocation();
+ if (SemiLoc.isMacroID())
+ Diag(SemiLoc, diag::err_header_import_semi_in_macro);
+
+ // Reconstitute the import token.
+ Token ImportTok;
+ ImportTok.startToken();
+ ImportTok.setKind(tok::kw_import);
+ ImportTok.setLocation(ModuleImportLoc);
+ ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
+ ImportTok.setLength(6);
+
+ auto Action = HandleHeaderIncludeOrImport(
+ /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
+ switch (Action.Kind) {
+ case ImportAction::None:
+ break;
+
+ case ImportAction::ModuleBegin:
+ // Let the parser know we're textually entering the module.
+ Suffix.emplace_back();
+ Suffix.back().startToken();
+ Suffix.back().setKind(tok::annot_module_begin);
+ Suffix.back().setLocation(SemiLoc);
+ Suffix.back().setAnnotationEndLoc(SemiLoc);
+ Suffix.back().setAnnotationValue(Action.ModuleForHeader);
+ LLVM_FALLTHROUGH;
+
+ case ImportAction::ModuleImport:
+ case ImportAction::SkippedModuleImport:
+ // We chose to import (or textually enter) the file. Convert the
+ // header-name token into a header unit annotation token.
+ Suffix[0].setKind(tok::annot_header_unit);
+ Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
+ Suffix[0].setAnnotationValue(Action.ModuleForHeader);
+ // FIXME: Call the moduleImport callback?
+ break;
+ }
+
+ EnterTokens(Suffix);
+ return false;
+ }
// The token sequence
//
@@ -916,7 +1211,7 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
Result.getLocation()));
ModuleImportExpectsIdentifier = false;
CurLexerKind = CLK_LexAfterModuleImport;
- return;
+ return true;
}
// If we're expecting a '.' or a ';', and we got a '.', then wait until we
@@ -925,40 +1220,61 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
ModuleImportExpectsIdentifier = true;
CurLexerKind = CLK_LexAfterModuleImport;
- return;
+ return true;
}
- // If we have a non-empty module path, load the named module.
- if (!ModuleImportPath.empty()) {
- // Under the Modules TS, the dot is just part of the module name, and not
- // a real hierarchy separator. Flatten such module names now.
- //
- // FIXME: Is this the right level to be performing this transformation?
- std::string FlatModuleName;
- if (getLangOpts().ModulesTS) {
- for (auto &Piece : ModuleImportPath) {
- if (!FlatModuleName.empty())
- FlatModuleName += ".";
- FlatModuleName += Piece.first->getName();
- }
- SourceLocation FirstPathLoc = ModuleImportPath[0].second;
- ModuleImportPath.clear();
- ModuleImportPath.push_back(
- std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
+ // If we didn't recognize a module name at all, this is not a (valid) import.
+ if (ModuleImportPath.empty() || Result.is(tok::eof))
+ return true;
+
+ // Consume the pp-import-suffix and expand any macros in it now, if we're not
+ // at the semicolon already.
+ SourceLocation SemiLoc = Result.getLocation();
+ if (Result.isNot(tok::semi)) {
+ Suffix.push_back(Result);
+ CollectPpImportSuffix(Suffix);
+ if (Suffix.back().isNot(tok::semi)) {
+ // This is not an import after all.
+ EnterTokens(Suffix);
+ return false;
}
+ SemiLoc = Suffix.back().getLocation();
+ }
- Module *Imported = nullptr;
- if (getLangOpts().Modules) {
- Imported = TheModuleLoader.loadModule(ModuleImportLoc,
- ModuleImportPath,
- Module::Hidden,
- /*IsIncludeDirective=*/false);
- if (Imported)
- makeModuleVisible(Imported, ModuleImportLoc);
+ // Under the Modules TS, the dot is just part of the module name, and not
+ // a real hierarchy separator. Flatten such module names now.
+ //
+ // FIXME: Is this the right level to be performing this transformation?
+ std::string FlatModuleName;
+ if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
+ for (auto &Piece : ModuleImportPath) {
+ if (!FlatModuleName.empty())
+ FlatModuleName += ".";
+ FlatModuleName += Piece.first->getName();
}
- if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
- Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
+ SourceLocation FirstPathLoc = ModuleImportPath[0].second;
+ ModuleImportPath.clear();
+ ModuleImportPath.push_back(
+ std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
+ }
+
+ Module *Imported = nullptr;
+ if (getLangOpts().Modules) {
+ Imported = TheModuleLoader.loadModule(ModuleImportLoc,
+ ModuleImportPath,
+ Module::Hidden,
+ /*IsInclusionDirective=*/false);
+ if (Imported)
+ makeModuleVisible(Imported, SemiLoc);
}
+ if (Callbacks)
+ Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
+
+ if (!Suffix.empty()) {
+ EnterTokens(Suffix);
+ return false;
+ }
+ return true;
}
void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
@@ -1039,14 +1355,14 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
void Preprocessor::addCommentHandler(CommentHandler *Handler) {
assert(Handler && "NULL comment handler");
- assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
- CommentHandlers.end() && "Comment handler already registered");
+ assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
+ "Comment handler already registered");
CommentHandlers.push_back(Handler);
}
void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
std::vector<CommentHandler *>::iterator Pos =
- std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
+ llvm::find(CommentHandlers, Handler);
assert(Pos != CommentHandlers.end() && "Comment handler not registered");
CommentHandlers.erase(Pos);
}