diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
commit | 461a67fa15370a9ec88f8f8a240bf7c123bb2029 (patch) | |
tree | 6942083d7d56bba40ec790a453ca58ad3baf6832 /lib/Lex | |
parent | 75c3240472ba6ac2669ee72ca67eb72d4e2851fc (diff) |
Notes
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/HeaderSearch.cpp | 91 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 465 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 57 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.cpp | 51 | ||||
-rw-r--r-- | lib/Lex/MacroInfo.cpp | 36 | ||||
-rw-r--r-- | lib/Lex/ModuleMap.cpp | 148 | ||||
-rw-r--r-- | lib/Lex/PPDirectives.cpp | 206 | ||||
-rw-r--r-- | lib/Lex/PPLexerChange.cpp | 7 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 144 | ||||
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 89 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 54 | ||||
-rw-r--r-- | lib/Lex/PreprocessingRecord.cpp | 49 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 95 | ||||
-rw-r--r-- | lib/Lex/PreprocessorLexer.cpp | 11 | ||||
-rw-r--r-- | lib/Lex/TokenConcatenation.cpp | 10 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 296 |
16 files changed, 1225 insertions, 584 deletions
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index 1ebcc0a1c657c..aa2588659ddfe 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -1,4 +1,4 @@ -//===--- HeaderSearch.cpp - Resolve Header File Locations ---===// +//===- HeaderSearch.cpp - Resolve Header File Locations -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,25 +12,38 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/HeaderSearch.h" +#include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/VirtualFileSystem.h" +#include "clang/Lex/DirectoryLookup.h" #include "clang/Lex/ExternalPreprocessorSource.h" #include "clang/Lex/HeaderMap.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/LexDiagnostic.h" -#include "clang/Lex/Lexer.h" +#include "clang/Lex/ModuleMap.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Capacity.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include <algorithm> +#include <cassert> +#include <cstddef> #include <cstdio> +#include <cstring> +#include <string> +#include <system_error> #include <utility> -#if defined(LLVM_ON_UNIX) -#include <limits.h> -#endif + using namespace clang; const IdentifierInfo * @@ -52,7 +65,7 @@ HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) { return ControllingMacro; } -ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {} +ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() = default; HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts, SourceManager &SourceMgr, DiagnosticsEngine &Diags, @@ -60,17 +73,7 @@ HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts, const TargetInfo *Target) : HSOpts(std::move(HSOpts)), Diags(Diags), FileMgr(SourceMgr.getFileManager()), FrameworkMap(64), - ModMap(SourceMgr, Diags, LangOpts, Target, *this) { - AngledDirIdx = 0; - SystemDirIdx = 0; - NoCurDirSearch = false; - - ExternalLookup = nullptr; - ExternalSource = nullptr; - NumIncluded = 0; - NumMultiIncludeFileOptzn = 0; - NumFrameworkLookups = NumSubFrameworkLookups = 0; -} + ModMap(SourceMgr, Diags, LangOpts, Target, *this) {} HeaderSearch::~HeaderSearch() { // Delete headermaps. @@ -128,36 +131,40 @@ void HeaderSearch::getHeaderMapFileNames( Names.push_back(HM.first->getName()); } -std::string HeaderSearch::getModuleFileName(Module *Module) { +std::string HeaderSearch::getCachedModuleFileName(Module *Module) { const FileEntry *ModuleMap = getModuleMap().getModuleMapFileForUniquing(Module); - return getModuleFileName(Module->Name, ModuleMap->getName(), - /*UsePrebuiltPath*/false); + return getCachedModuleFileName(Module->Name, ModuleMap->getName()); } -std::string HeaderSearch::getModuleFileName(StringRef ModuleName, - StringRef ModuleMapPath, - bool UsePrebuiltPath) { - if (UsePrebuiltPath) { - if (HSOpts->PrebuiltModulePaths.empty()) - return std::string(); - - // Go though each prebuilt module path and try to find the pcm file. - for (const std::string &Dir : HSOpts->PrebuiltModulePaths) { - SmallString<256> Result(Dir); - llvm::sys::fs::make_absolute(Result); - - llvm::sys::path::append(Result, ModuleName + ".pcm"); - if (getFileMgr().getFile(Result.str())) - return Result.str().str(); - } - return std::string(); +std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, + bool FileMapOnly) { + // First check the module name to pcm file map. + auto i (HSOpts->PrebuiltModuleFiles.find(ModuleName)); + if (i != HSOpts->PrebuiltModuleFiles.end()) + return i->second; + + if (FileMapOnly || HSOpts->PrebuiltModulePaths.empty()) + return {}; + + // Then go through each prebuilt module directory and try to find the pcm + // file. + for (const std::string &Dir : HSOpts->PrebuiltModulePaths) { + SmallString<256> Result(Dir); + llvm::sys::fs::make_absolute(Result); + llvm::sys::path::append(Result, ModuleName + ".pcm"); + if (getFileMgr().getFile(Result.str())) + return Result.str().str(); } + return {}; +} +std::string HeaderSearch::getCachedModuleFileName(StringRef ModuleName, + StringRef ModuleMapPath) { // If we don't have a module cache path or aren't supposed to use one, we // can't do anything. if (getModuleCachePath().empty()) - return std::string(); + return {}; SmallString<256> Result(getModuleCachePath()); llvm::sys::fs::make_absolute(Result); @@ -177,7 +184,7 @@ std::string HeaderSearch::getModuleFileName(StringRef ModuleName, Parent = "."; auto *Dir = FileMgr.getDirectory(Parent); if (!Dir) - return std::string(); + return {}; auto DirName = FileMgr.getCanonicalName(Dir); auto FileName = llvm::sys::path::filename(ModuleMapPath); @@ -376,7 +383,6 @@ const FileEntry *DirectoryLookup::LookupFile( Filename = StringRef(MappedName.begin(), MappedName.size()); HasBeenMapped = true; Result = HM->LookupFile(Filename, HS.getFileMgr()); - } else { Result = HS.getFileMgr().getFile(Dest); } @@ -587,7 +593,6 @@ void HeaderSearch::setTarget(const TargetInfo &Target) { ModMap.setTarget(Target); } - //===----------------------------------------------------------------------===// // Header File Location. //===----------------------------------------------------------------------===// @@ -954,7 +959,6 @@ LookupSubframeworkHeader(StringRef Filename, HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end()); if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) { - // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" HeadersFilename = FrameworkName; HeadersFilename += "PrivateHeaders/"; @@ -1111,7 +1115,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP, // FIXME: this is a workaround for the lack of proper modules-aware support // for #import / #pragma once - auto TryEnterImported = [&](void) -> bool { + auto TryEnterImported = [&]() -> bool { if (!ModulesEnabled) return false; // Ensure FileInfo bits are up to date. @@ -1444,7 +1448,6 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name, return ModMap.findModule(Name); } - HeaderSearch::LoadModuleMapResult HeaderSearch::loadModuleMapFile(StringRef DirName, bool IsSystem, bool IsFramework) { diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 61bcef8cb760e..830354ab23f0d 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -1,4 +1,4 @@ -//===--- Lexer.cpp - C Language Family Lexer ------------------------------===// +//===- Lexer.cpp - C Language Family Lexer --------------------------------===// // // The LLVM Compiler Infrastructure // @@ -15,17 +15,29 @@ #include "UnicodeCharSets.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MultipleIncludeOpt.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Lex/Token.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/TokenKinds.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/NativeFormatting.h" #include "llvm/Support/UnicodeCharRanges.h" #include <algorithm> #include <cassert> @@ -63,7 +75,7 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { // Lexer Class Implementation //===----------------------------------------------------------------------===// -void Lexer::anchor() { } +void Lexer::anchor() {} void Lexer::InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd) { @@ -120,31 +132,21 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, /// assumes that the associated file buffer and Preprocessor objects will /// outlive it, so it doesn't take ownership of either of them. Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) - : PreprocessorLexer(&PP, FID), - FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), - LangOpts(PP.getLangOpts()) { - + : PreprocessorLexer(&PP, FID), + FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), + LangOpts(PP.getLangOpts()) { InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), InputFile->getBufferEnd()); resetExtendedTokenMode(); } -void Lexer::resetExtendedTokenMode() { - assert(PP && "Cannot reset token mode without a preprocessor"); - if (LangOpts.TraditionalCPP) - SetKeepWhitespaceMode(true); - else - SetCommentRetentionState(PP->getCommentRetentionState()); -} - /// Lexer constructor - Create a new raw lexer object. This object is only /// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text /// range will outlive it, so it doesn't take ownership of it. Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, const char *BufStart, const char *BufPtr, const char *BufEnd) - : FileLoc(fileloc), LangOpts(langOpts) { - + : FileLoc(fileloc), LangOpts(langOpts) { InitLexer(BufStart, BufPtr, BufEnd); // We *are* in raw mode. @@ -159,6 +161,14 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile, : Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(), FromFile->getBufferStart(), FromFile->getBufferEnd()) {} +void Lexer::resetExtendedTokenMode() { + assert(PP && "Cannot reset token mode without a preprocessor"); + if (LangOpts.TraditionalCPP) + SetKeepWhitespaceMode(true); + else + SetCommentRetentionState(PP->getCommentRetentionState()); +} + /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for /// _Pragma expansion. This has a variety of magic semantics that this method /// sets up. It returns a new'd Lexer that must be delete'd when done. @@ -209,30 +219,39 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, return L; } -/// Stringify - Convert the specified string into a C string, with surrounding -/// ""'s, and with escaped \ and " characters. +template <typename T> static void StringifyImpl(T &Str, char Quote) { + typename T::size_type i = 0, e = Str.size(); + while (i < e) { + if (Str[i] == '\\' || Str[i] == Quote) { + Str.insert(Str.begin() + i, '\\'); + i += 2; + ++e; + } else if (Str[i] == '\n' || Str[i] == '\r') { + // Replace '\r\n' and '\n\r' to '\\' followed by 'n'. + if ((i < e - 1) && (Str[i + 1] == '\n' || Str[i + 1] == '\r') && + Str[i] != Str[i + 1]) { + Str[i] = '\\'; + Str[i + 1] = 'n'; + } else { + // Replace '\n' and '\r' to '\\' followed by 'n'. + Str[i] = '\\'; + Str.insert(Str.begin() + i + 1, 'n'); + ++e; + } + i += 2; + } else + ++i; + } +} + std::string Lexer::Stringify(StringRef Str, bool Charify) { std::string Result = Str; char Quote = Charify ? '\'' : '"'; - for (unsigned i = 0, e = Result.size(); i != e; ++i) { - if (Result[i] == '\\' || Result[i] == Quote) { - Result.insert(Result.begin()+i, '\\'); - ++i; ++e; - } - } + StringifyImpl(Result, Quote); return Result; } -/// Stringify - Convert the specified string into a C string by escaping '\' -/// and " characters. This does not add surrounding ""'s to the string. -void Lexer::Stringify(SmallVectorImpl<char> &Str) { - for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (Str[i] == '\\' || Str[i] == '"') { - Str.insert(Str.begin()+i, '\\'); - ++i; ++e; - } - } -} +void Lexer::Stringify(SmallVectorImpl<char> &Str) { StringifyImpl(Str, '"'); } //===----------------------------------------------------------------------===// // Token Spelling @@ -307,7 +326,7 @@ StringRef Lexer::getSpelling(SourceLocation loc, StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); if (invalidTemp) { if (invalid) *invalid = true; - return StringRef(); + return {}; } const char *tokenBegin = file.data() + locInfo.second; @@ -345,7 +364,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, if (Invalid) *Invalid = CharDataInvalid; if (CharDataInvalid) - return std::string(); + return {}; // If this token contains nothing interesting, return it directly. if (!Tok.needsCleaning()) @@ -367,7 +386,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, /// to point to a constant buffer with the data already in it (avoiding a /// copy). The caller is not allowed to modify the returned buffer pointer /// if an internal buffer is returned. -unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, +unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid) { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); @@ -463,19 +482,15 @@ static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) { const char *BufStart = Buffer.data(); if (Offset >= Buffer.size()) return nullptr; - const char *StrData = BufStart + Offset; - - if (StrData[0] == '\n' || StrData[0] == '\r') - return StrData; - const char *LexStart = StrData; - while (LexStart != BufStart) { - if (LexStart[0] == '\n' || LexStart[0] == '\r') { + const char *LexStart = BufStart + Offset; + for (; LexStart != BufStart; --LexStart) { + if (isVerticalWhitespace(LexStart[0]) && + !Lexer::isNewLineEscaped(BufStart, LexStart)) { + // LexStart should point at first character of logical line. ++LexStart; break; } - - --LexStart; } return LexStart; } @@ -487,7 +502,7 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); if (LocInfo.first.isInvalid()) return Loc; - + bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) @@ -499,31 +514,31 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second); if (!LexStart || LexStart == StrData) return Loc; - + // Create a lexer starting at the beginning of this token. SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart, Buffer.end()); TheLexer.SetCommentRetentionState(true); - + // Lex tokens until we find the token that contains the source location. Token TheTok; do { TheLexer.LexFromRawLexer(TheTok); - + if (TheLexer.getBufferLocation() > StrData) { // Lexing this token has taken the lexer past the source location we're // looking for. If the current token encompasses our source location, // return the beginning of that token. if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) return TheTok.getLocation(); - + // We ended up skipping over the source location entirely, which means // that it points into whitespace. We're done here. break; } } while (TheTok.getKind() != tok::eof); - + // We've passed our source location; just return the original source location. return Loc; } @@ -531,34 +546,34 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { - if (Loc.isFileID()) - return getBeginningOfFileToken(Loc, SM, LangOpts); - - if (!SM.isMacroArgExpansion(Loc)) - return Loc; - - SourceLocation FileLoc = SM.getSpellingLoc(Loc); - SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); - std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); - std::pair<FileID, unsigned> BeginFileLocInfo - = SM.getDecomposedLoc(BeginFileLoc); - assert(FileLocInfo.first == BeginFileLocInfo.first && - FileLocInfo.second >= BeginFileLocInfo.second); - return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); + if (Loc.isFileID()) + return getBeginningOfFileToken(Loc, SM, LangOpts); + + if (!SM.isMacroArgExpansion(Loc)) + return Loc; + + SourceLocation FileLoc = SM.getSpellingLoc(Loc); + SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); + std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); + std::pair<FileID, unsigned> BeginFileLocInfo = + SM.getDecomposedLoc(BeginFileLoc); + assert(FileLocInfo.first == BeginFileLocInfo.first && + FileLocInfo.second >= BeginFileLocInfo.second); + return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); } namespace { - enum PreambleDirectiveKind { - PDK_Skipped, - PDK_Unknown - }; +enum PreambleDirectiveKind { + PDK_Skipped, + PDK_Unknown +}; -} // end anonymous namespace +} // namespace -std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines) { +PreambleBounds Lexer::ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines) { // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. @@ -568,9 +583,6 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, Buffer.end()); TheLexer.SetCommentRetentionState(true); - // StartLoc will differ from FileLoc if there is a BOM that was skipped. - SourceLocation StartLoc = TheLexer.getSourceLocation(); - bool InPreprocessorDirective = false; Token TheTok; SourceLocation ActiveCommentLoc; @@ -599,17 +611,17 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, if (TheTok.getKind() == tok::eof) { break; } - + // If we haven't hit the end of the preprocessor directive, skip this // token. if (!TheTok.isAtStartOfLine()) continue; - + // We've passed the end of the preprocessor directive, and will look // at this token again below. InPreprocessorDirective = false; } - + // Keep track of the # of lines in the preamble. if (TheTok.isAtStartOfLine()) { unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset; @@ -626,13 +638,13 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, ActiveCommentLoc = TheTok.getLocation(); continue; } - + if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) { - // This is the start of a preprocessor directive. + // This is the start of a preprocessor directive. Token HashTok = TheTok; InPreprocessorDirective = true; ActiveCommentLoc = SourceLocation(); - + // Figure out which directive this is. Since we're lexing raw tokens, // we don't have an identifier table available. Instead, just look at // the raw identifier to recognize and categorize preprocessor directives. @@ -672,7 +684,7 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, break; } } - + // We only end up here if we didn't recognize the preprocessor // directive or it was one that can't occur in the preamble at this // point. Roll back the current token to the location of the '#'. @@ -685,14 +697,14 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, // the preamble. break; } while (true); - + SourceLocation End; if (ActiveCommentLoc.isValid()) End = ActiveCommentLoc; // don't truncate a decl comment. else End = TheTok.getLocation(); - return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), + return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(), TheTok.isAtStartOfLine()); } @@ -707,13 +719,13 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, // trigraphs. bool Invalid = false; const char *TokPtr = SM.getCharacterData(TokStart, &Invalid); - + // If they request the first char of the token, we're trivially done. if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) return TokStart; - + unsigned PhysOffset = 0; - + // The usual case is that tokens don't contain anything interesting. Skip // over the uninteresting characters. If a token only consists of simple // chars, this method is extremely fast. @@ -724,7 +736,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, --CharNo; ++PhysOffset; } - + // If we have a character that may be a trigraph or escaped newline, use a // lexer to parse it correctly. for (; CharNo; --CharNo) { @@ -733,14 +745,14 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, TokPtr += Size; PhysOffset += Size; } - + // Final detail: if we end up on an escaped newline, we want to return the // location of the actual byte of the token. For example foo\<newline>bar // advanced by 3 should return the location of b, not of \\. One compounding // detail of this is that the escape may be made by a trigraph. if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; - + return TokStart.getLocWithOffset(PhysOffset); } @@ -763,11 +775,11 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts) { if (Loc.isInvalid()) - return SourceLocation(); + return {}; if (Loc.isMacroID()) { if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) - return SourceLocation(); // Points inside the macro expansion. + return {}; // Points inside the macro expansion. } unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts); @@ -775,7 +787,7 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, Len = Len - Offset; else return Loc; - + return Loc.getLocWithOffset(Len); } @@ -838,7 +850,7 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, if (Range.isTokenRange()) { End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts); if (End.isInvalid()) - return CharSourceRange(); + return {}; } // Break down the source locations. @@ -846,12 +858,12 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, unsigned BeginOffs; std::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); if (FID.isInvalid()) - return CharSourceRange(); + return {}; unsigned EndOffs; if (!SM.isInFileID(End, FID, &EndOffs) || BeginOffs > EndOffs) - return CharSourceRange(); + return {}; return CharSourceRange::getCharRange(Begin, End); } @@ -862,14 +874,14 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, SourceLocation Begin = Range.getBegin(); SourceLocation End = Range.getEnd(); if (Begin.isInvalid() || End.isInvalid()) - return CharSourceRange(); + return {}; if (Begin.isFileID() && End.isFileID()) return makeRangeFromFileLocs(Range, SM, LangOpts); if (Begin.isMacroID() && End.isFileID()) { if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin)) - return CharSourceRange(); + return {}; Range.setBegin(Begin); return makeRangeFromFileLocs(Range, SM, LangOpts); } @@ -879,7 +891,7 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, &End)) || (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts, &End))) - return CharSourceRange(); + return {}; Range.setEnd(End); return makeRangeFromFileLocs(Range, SM, LangOpts); } @@ -900,13 +912,13 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), &Invalid); if (Invalid) - return CharSourceRange(); + return {}; if (BeginEntry.getExpansion().isMacroArgExpansion()) { const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), &Invalid); if (Invalid) - return CharSourceRange(); + return {}; if (EndEntry.getExpansion().isMacroArgExpansion() && BeginEntry.getExpansion().getExpansionLocStart() == @@ -917,7 +929,7 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, } } - return CharSourceRange(); + return {}; } StringRef Lexer::getSourceText(CharSourceRange Range, @@ -927,21 +939,21 @@ StringRef Lexer::getSourceText(CharSourceRange Range, Range = makeFileCharRange(Range, SM, LangOpts); if (Range.isInvalid()) { if (Invalid) *Invalid = true; - return StringRef(); + return {}; } // Break down the source location. std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin()); if (beginInfo.first.isInvalid()) { if (Invalid) *Invalid = true; - return StringRef(); + return {}; } unsigned EndOffs; if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) || beginInfo.second > EndOffs) { if (Invalid) *Invalid = true; - return StringRef(); + return {}; } // Try to the load the file buffer. @@ -949,7 +961,7 @@ StringRef Lexer::getSourceText(CharSourceRange Range, StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp); if (invalidTemp) { if (Invalid) *Invalid = true; - return StringRef(); + return {}; } if (Invalid) *Invalid = false; @@ -972,7 +984,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc, // For macro arguments we need to check that the argument did not come // from an inner macro, e.g: "MAC1( MAC2(foo) )" - + // Loc points to the argument id of the macro definition, move to the // macro expansion. Loc = SM.getImmediateExpansionRange(Loc).first; @@ -1013,7 +1025,7 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics( // If the macro's spelling has no FileID, then it's actually a token paste // or stringization (or similar) and not a macro at all. if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc)))) - return StringRef(); + return {}; // Find the spelling location of the start of the non-argument expansion // range. This is where the macro name was spelled in order to begin @@ -1032,20 +1044,40 @@ bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { return isIdentifierBody(c, LangOpts.DollarIdents); } +bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { + assert(isVerticalWhitespace(Str[0])); + if (Str - 1 < BufferStart) + return false; + + if ((Str[0] == '\n' && Str[-1] == '\r') || + (Str[0] == '\r' && Str[-1] == '\n')) { + if (Str - 2 < BufferStart) + return false; + --Str; + } + --Str; + + // Rewind to first non-space character: + while (Str > BufferStart && isHorizontalWhitespace(*Str)) + --Str; + + return *Str == '\\'; +} + StringRef Lexer::getIndentationForLine(SourceLocation Loc, const SourceManager &SM) { if (Loc.isInvalid() || Loc.isMacroID()) - return ""; + return {}; std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); if (LocInfo.first.isInvalid()) - return ""; + return {}; bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) - return ""; + return {}; const char *Line = findBeginningOfLine(Buffer, LocInfo.second); if (!Line) - return ""; + return {}; StringRef Rest = Buffer.substr(Line - Buffer.data()); size_t NumWhitespaceChars = Rest.find_first_not_of(" \t"); return NumWhitespaceChars == StringRef::npos @@ -1199,18 +1231,12 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { } } -/// \brief Checks that the given token is the first token that occurs after the -/// given location (this excludes comments and whitespace). Returns the location -/// immediately after the specified token. If the token is not found or the -/// location is inside a macro, the returned source location will be invalid. -SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, - tok::TokenKind TKind, - const SourceManager &SM, - const LangOptions &LangOpts, - bool SkipTrailingWhitespaceAndNewLine) { +Optional<Token> Lexer::findNextToken(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { if (Loc.isMacroID()) { if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) - return SourceLocation(); + return None; } Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); @@ -1221,7 +1247,7 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, bool InvalidTemp = false; StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); if (InvalidTemp) - return SourceLocation(); + return None; const char *TokenBegin = File.data() + LocInfo.second; @@ -1231,15 +1257,25 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, // Find the token. Token Tok; lexer.LexFromRawLexer(Tok); - if (Tok.isNot(TKind)) - return SourceLocation(); - SourceLocation TokenLoc = Tok.getLocation(); + return Tok; +} + +/// \brief Checks that the given token is the first token that occurs after the +/// given location (this excludes comments and whitespace). Returns the location +/// immediately after the specified token. If the token is not found or the +/// location is inside a macro, the returned source location will be invalid. +SourceLocation Lexer::findLocationAfterToken( + SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM, + const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) { + Optional<Token> Tok = findNextToken(Loc, SM, LangOpts); + if (!Tok || Tok->isNot(TKind)) + return {}; + SourceLocation TokenLoc = Tok->getLocation(); // Calculate how much whitespace needs to be skipped if any. unsigned NumWhitespaceChars = 0; if (SkipTrailingWhitespaceAndNewLine) { - const char *TokenEnd = SM.getCharacterData(TokenLoc) + - Tok.getLength(); + const char *TokenEnd = SM.getCharacterData(TokenLoc) + Tok->getLength(); unsigned char C = *TokenEnd; while (isHorizontalWhitespace(C)) { C = *(++TokenEnd); @@ -1256,7 +1292,7 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, } } - return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars); + return TokenLoc.getLocWithOffset(Tok->getLength() + NumWhitespaceChars); } /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, @@ -1274,7 +1310,6 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, /// /// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should /// be updated to match. -/// char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, Token *Tok) { // If we have a slash, look for an escaped newline. @@ -1378,9 +1413,9 @@ Slash: // Helper methods for lexing. //===----------------------------------------------------------------------===// -/// \brief Routine that indiscriminately skips bytes in the source file. -void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { - BufferPtr += Bytes; +/// \brief Routine that indiscriminately sets the offset into the source file. +void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { + BufferPtr = BufferStart + Offset; if (BufferPtr > BufferEnd) BufferPtr = BufferEnd; // FIXME: What exactly does the StartOfLine bit mean? There are two @@ -1466,6 +1501,75 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, } } +/// After encountering UTF-8 character C and interpreting it as an identifier +/// character, check whether it's a homoglyph for a common non-identifier +/// source character that is unlikely to be an intentional identifier +/// character and warn if so. +static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, + CharSourceRange Range) { + // FIXME: Handle Unicode quotation marks (smart quotes, fullwidth quotes). + struct HomoglyphPair { + uint32_t Character; + char LooksLike; + bool operator<(HomoglyphPair R) const { return Character < R.Character; } + }; + static constexpr HomoglyphPair SortedHomoglyphs[] = { + {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK + {U'\u037e', ';'}, // GREEK QUESTION MARK + {U'\u2212', '-'}, // MINUS SIGN + {U'\u2215', '/'}, // DIVISION SLASH + {U'\u2216', '\\'}, // SET MINUS + {U'\u2217', '*'}, // ASTERISK OPERATOR + {U'\u2223', '|'}, // DIVIDES + {U'\u2227', '^'}, // LOGICAL AND + {U'\u2236', ':'}, // RATIO + {U'\u223c', '~'}, // TILDE OPERATOR + {U'\ua789', ':'}, // MODIFIER LETTER COLON + {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK + {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN + {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN + {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN + {U'\uff06', '&'}, // FULLWIDTH AMPERSAND + {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS + {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS + {U'\uff0a', '*'}, // FULLWIDTH ASTERISK + {U'\uff0b', '+'}, // FULLWIDTH ASTERISK + {U'\uff0c', ','}, // FULLWIDTH COMMA + {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS + {U'\uff0e', '.'}, // FULLWIDTH FULL STOP + {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS + {U'\uff1a', ':'}, // FULLWIDTH COLON + {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON + {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN + {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN + {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN + {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK + {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT + {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET + {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS + {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET + {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT + {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET + {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE + {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET + {U'\uff5e', '~'}, // FULLWIDTH TILDE + {0, 0} + }; + auto Homoglyph = + std::lower_bound(std::begin(SortedHomoglyphs), + std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'}); + if (Homoglyph->Character == C) { + llvm::SmallString<5> CharBuf; + { + llvm::raw_svector_ostream CharOS(CharBuf); + llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4); + } + const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; + Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) + << Range << CharBuf << LooksLikeStr; + } +} + bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, Token &Result) { const char *UCNPtr = CurPtr + Size; @@ -1500,10 +1604,13 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) return false; - if (!isLexingRawMode()) + if (!isLexingRawMode()) { maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, makeCharRange(*this, CurPtr, UnicodePtr), /*IsFirst=*/false); + maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurPtr, UnicodePtr)); + } CurPtr = UnicodePtr; return true; @@ -1569,7 +1676,6 @@ FinishIdentifier: CurPtr = ConsumeChar(CurPtr, Size, Result); C = getCharAndSize(CurPtr, Size); continue; - } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) { C = getCharAndSize(CurPtr, Size); continue; @@ -1632,7 +1738,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { if (!LangOpts.C99) { if (!isHexaLiteral(BufferPtr, LangOpts)) IsHexFloat = false; - else if (!getLangOpts().CPlusPlus1z && + else if (!getLangOpts().CPlusPlus17 && std::find(BufferPtr, CurPtr, '_') != CurPtr) IsHexFloat = false; } @@ -1778,7 +1884,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, // getAndAdvanceChar. if (C == '\\') C = getAndAdvanceChar(CurPtr, Result); - + if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) @@ -1786,7 +1892,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, FormTokenWithChars(Result, CurPtr-1, tok::unknown); return true; } - + if (C == 0) { if (isCodeCompletionPoint(CurPtr-1)) { PP->CodeCompleteNaturalLanguage(); @@ -2000,7 +2106,6 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, /// Update BufferPtr to point to the next non-whitespace character and return. /// /// This method forms a token and returns true if KeepWhitespaceMode is enabled. -/// bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, bool &TokAtPhysicalStartOfLine) { // Whitespace - Skip it, then return the token after the whitespace. @@ -2131,7 +2236,8 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, // If we read multiple characters, and one of those characters was a \r or // \n, then we had an escaped newline within the comment. Emit diagnostic // unless the next line is also a // comment. - if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') { + if (CurPtr != OldPtr + 1 && C != '/' && + (CurPtr == BufferEnd + 1 || CurPtr[0] != '/')) { for (; OldPtr != CurPtr; ++OldPtr) if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { // Okay, we found a // comment that ends in a newline, if the next @@ -2214,7 +2320,7 @@ bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) { std::string Spelling = PP->getSpelling(Result, &Invalid); if (Invalid) return true; - + assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?"); Spelling[1] = '*'; // Change prefix to "/*". Spelling += "*/"; // add suffix. @@ -2540,7 +2646,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { resetExtendedTokenMode(); return true; // Have a token. } - + // If we are in raw mode, return this event as an EOF token. Let the caller // that put us in raw mode handle the event. if (isLexingRawMode()) { @@ -2549,7 +2655,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { FormTokenWithChars(Result, BufferEnd, tok::eof); return true; } - + if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) { PP->setRecordedPreambleConditionalStack(ConditionalStack); ConditionalStack.clear(); @@ -2661,7 +2767,7 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r') return false; - + // Check to see if we have <<<<<<< or >>>>. if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") && !StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> ")) @@ -2671,7 +2777,7 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { // it. if (CurrentConflictMarkerState || isLexingRawMode()) return false; - + ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce; // Check to see if there is an ending marker somewhere in the buffer at the @@ -2681,7 +2787,7 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { // Diagnose this, and ignore to the end of line. Diag(CurPtr, diag::err_conflict_marker); CurrentConflictMarkerState = Kind; - + // Skip ahead to the end of line. We know this exists because the // end-of-conflict marker starts with \r or \n. while (*CurPtr != '\r' && *CurPtr != '\n') { @@ -2691,7 +2797,7 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { BufferPtr = CurPtr; return true; } - + // No end of conflict marker found. return false; } @@ -2705,35 +2811,35 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { if (CurPtr != BufferStart && CurPtr[-1] != '\n' && CurPtr[-1] != '\r') return false; - + // If we have a situation where we don't care about conflict markers, ignore // it. if (!CurrentConflictMarkerState || isLexingRawMode()) return false; - + // Check to see if we have the marker (4 characters in a row). for (unsigned i = 1; i != 4; ++i) if (CurPtr[i] != CurPtr[0]) return false; - + // If we do have it, search for the end of the conflict marker. This could // fail if it got skipped with a '#if 0' or something. Note that CurPtr might // be the end of conflict marker. if (const char *End = FindConflictEnd(CurPtr, BufferEnd, CurrentConflictMarkerState)) { CurPtr = End; - + // Skip ahead to the end of line. while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n') ++CurPtr; - + BufferPtr = CurPtr; - + // No longer in the conflict marker. CurrentConflictMarkerState = CMK_None; return true; } - + return false; } @@ -2872,7 +2978,6 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, } return 0; - } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) { // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't. // We don't use isLexingRawMode() here because we need to diagnose bad @@ -3042,7 +3147,7 @@ LexNextToken: // We know the lexer hasn't changed, so just try again with this lexer. // (We manually eliminate the tail call to avoid recursion.) goto LexNextToken; - + case 26: // DOS & CP/M EOF: "^Z". // If we're in Microsoft extensions mode, treat this as end of file. if (LangOpts.MicrosoftExt) { @@ -3054,9 +3159,12 @@ LexNextToken: // If Microsoft extensions are disabled, this is just random garbage. Kind = tok::unknown; break; - - case '\n': + case '\r': + if (CurPtr[0] == '\n') + Char = getAndAdvanceChar(CurPtr, Result); + LLVM_FALLTHROUGH; + case '\n': // If we are inside a preprocessor directive and we see the end of line, // we know we are done with the directive, so return an EOD token. if (ParsingPreprocessorDirective) { @@ -3114,7 +3222,7 @@ LexNextToken: // We only saw whitespace, so just try again with this lexer. // (We manually eliminate the tail call to avoid recursion.) goto LexNextToken; - + // C99 6.4.4.1: Integer Constants. // C99 6.4.4.2: Floating Constants. case '0': case '1': case '2': case '3': case '4': @@ -3157,7 +3265,7 @@ LexNextToken: ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result), tok::utf8_string_literal); - if (Char2 == '\'' && LangOpts.CPlusPlus1z) + if (Char2 == '\'' && LangOpts.CPlusPlus17) return LexCharConstant( Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result), @@ -3501,6 +3609,24 @@ LexNextToken: Kind = tok::lessless; } } else if (Char == '=') { + char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); + if (After == '>') { + if (getLangOpts().CPlusPlus2a) { + if (!isLexingRawMode()) + Diag(BufferPtr, diag::warn_cxx17_compat_spaceship); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + Kind = tok::spaceship; + break; + } + // Suggest adding a space between the '<=' and the '>' to avoid a + // change in semantics if this turns up in C++ <=17 mode. + if (getLangOpts().CPlusPlus && !isLexingRawMode()) { + Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship) + << FixItHint::CreateInsertion( + getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " "); + } + } CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::lessequal; } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' @@ -3526,7 +3652,8 @@ LexNextToken: } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::l_brace; - } else if (Char == '#' && lexEditorPlaceholder(Result, CurPtr)) { + } else if (Char == '#' && /*Not a trigraph*/ SizeTmp == 1 && + lexEditorPlaceholder(Result, CurPtr)) { return true; } else { Kind = tok::less; @@ -3594,7 +3721,9 @@ LexNextToken: if (LangOpts.Digraphs && Char == '>') { Kind = tok::r_square; // ':>' -> ']' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else if (LangOpts.CPlusPlus && Char == ':') { + } else if ((LangOpts.CPlusPlus || + LangOpts.DoubleSquareBracketAttributes) && + Char == ':') { Kind = tok::coloncolon; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else { @@ -3610,7 +3739,7 @@ LexNextToken: // If this is '====' and we're in a conflict marker, ignore it. if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) goto LexNextToken; - + Kind = tok::equalequal; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else { @@ -3681,6 +3810,7 @@ LexNextToken: // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurPtr; + const char *UTF8StartPtr = CurPtr; llvm::ConversionResult Status = llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)BufferEnd, @@ -3695,9 +3825,12 @@ LexNextToken: // (We manually eliminate the tail call to avoid recursion.) goto LexNextToken; } + if (!isLexingRawMode()) + maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, UTF8StartPtr, CurPtr)); return LexUnicode(Result, CodePoint, CurPtr); } - + if (isLexingRawMode() || ParsingPreprocessorDirective || PP->isPreprocessedOutput()) { ++CurPtr; diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index a598a467816a8..cbec5e6b63856 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -544,6 +544,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, isHalf = false; isFloat = false; isImaginary = false; + isFloat16 = false; isFloat128 = false; MicrosoftInteger = 0; hadError = false; @@ -588,6 +589,13 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, if (!isFPConstant) break; // Error for integer constant. if (isHalf || isFloat || isLong || isFloat128) break; // HF, FF, LF, QF invalid. + + if (s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { + s += 2; // success, eat up 2 characters. + isFloat16 = true; + continue; + } + isFloat = true; continue; // Success. case 'q': // FP Suffix for "__float128" @@ -658,9 +666,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, break; } } - // "i", "if", and "il" are user-defined suffixes in C++1y. - if (*s == 'i' && PP.getLangOpts().CPlusPlus14) - break; // fall through. case 'j': case 'J': @@ -672,35 +677,35 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, break; } - if (s != ThisTokEnd) { + // "i", "if", and "il" are user-defined suffixes in C++1y. + if (s != ThisTokEnd || isImaginary) { // FIXME: Don't bother expanding UCNs if !tok.hasUCN(). expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)); if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) { - // Any suffix pieces we might have parsed are actually part of the - // ud-suffix. - isLong = false; - isUnsigned = false; - isLongLong = false; - isFloat = false; - isHalf = false; - isImaginary = false; - MicrosoftInteger = 0; + if (!isImaginary) { + // Any suffix pieces we might have parsed are actually part of the + // ud-suffix. + isLong = false; + isUnsigned = false; + isLongLong = false; + isFloat = false; + isFloat16 = false; + isHalf = false; + isImaginary = false; + MicrosoftInteger = 0; + } saw_ud_suffix = true; return; } - // Report an error if there are any. - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), - diag::err_invalid_suffix_constant) - << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin) << isFPConstant; - hadError = true; - return; - } - - if (isImaginary) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), - diag::ext_imaginary_constant); + if (s != ThisTokEnd) { + // Report an error if there are any. + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), + diag::err_invalid_suffix_constant) + << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin) << isFPConstant; + hadError = true; + } } } @@ -850,8 +855,8 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus ? diag::ext_hex_literal_invalid : diag::ext_hex_constant_invalid); - else if (PP.getLangOpts().CPlusPlus1z) - PP.Diag(TokLoc, diag::warn_cxx1z_hex_literal); + else if (PP.getLangOpts().CPlusPlus17) + PP.Diag(TokLoc, diag::warn_cxx17_hex_literal); } else if (saw_period) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), diag::err_hex_constant_requires) diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp index f791d8d4bacc2..5c0f0623c3e17 100644 --- a/lib/Lex/MacroArgs.cpp +++ b/lib/Lex/MacroArgs.cpp @@ -33,7 +33,7 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, // See if we have an entry with a big enough argument list to reuse on the // free list. If so, reuse it. for (MacroArgs **Entry = &PP.MacroArgCache; *Entry; - Entry = &(*Entry)->ArgCache) + Entry = &(*Entry)->ArgCache) { if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() && (*Entry)->NumUnexpArgTokens < ClosestMatch) { ResultEnt = Entry; @@ -44,14 +44,12 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, // Otherwise, use the best fit. ClosestMatch = (*Entry)->NumUnexpArgTokens; } - + } MacroArgs *Result; if (!ResultEnt) { - // Allocate memory for a MacroArgs object with the lexer tokens at the end. - Result = (MacroArgs *)malloc(sizeof(MacroArgs) + - UnexpArgTokens.size() * sizeof(Token)); - // Construct the MacroArgs object. - new (Result) + // Allocate memory for a MacroArgs object with the lexer tokens at the end, + // and construct the MacroArgs object. + Result = new (std::malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size()))) MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams()); } else { Result = *ResultEnt; @@ -63,9 +61,14 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, } // Copy the actual unexpanded tokens to immediately after the result ptr. - if (!UnexpArgTokens.empty()) - std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), - const_cast<Token*>(Result->getUnexpArgument(0))); + if (!UnexpArgTokens.empty()) { + static_assert(std::is_trivial<Token>::value, + "assume trivial copyability if copying into the " + "uninitialized array (as opposed to reusing a cached " + "MacroArgs)"); + std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), + Result->getTrailingObjects<Token>()); + } return Result; } @@ -93,6 +96,8 @@ MacroArgs *MacroArgs::deallocate() { // Run the dtor to deallocate the vectors. this->~MacroArgs(); // Release the memory for the object. + static_assert(std::is_trivially_destructible<Token>::value, + "assume trivially destructible and forego destructors"); free(this); return Next; @@ -113,10 +118,13 @@ unsigned MacroArgs::getArgLength(const Token *ArgPtr) { /// getUnexpArgument - Return the unexpanded tokens for the specified formal. /// const Token *MacroArgs::getUnexpArgument(unsigned Arg) const { + + assert(Arg < getNumMacroArguments() && "Invalid arg #"); // The unexpanded argument tokens start immediately after the MacroArgs object // in memory. - const Token *Start = (const Token *)(this+1); + const Token *Start = getTrailingObjects<Token>(); const Token *Result = Start; + // Scan to find Arg. for (; Arg; ++Result) { assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); @@ -127,6 +135,16 @@ const Token *MacroArgs::getUnexpArgument(unsigned Arg) const { return Result; } +// This function assumes that the variadic arguments are the tokens +// corresponding to the last parameter (ellipsis) - and since tokens are +// separated by the 'eof' token, if that is the only token corresponding to that +// last parameter, we know no variadic arguments were supplied. +bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI) const { + if (!MI->isVariadic()) + return false; + const int VariadicArgIndex = getNumMacroArguments() - 1; + return getUnexpArgument(VariadicArgIndex)->isNot(tok::eof); +} /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected /// by pre-expansion, return false. Otherwise, conservatively return true. @@ -145,14 +163,13 @@ bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, /// getPreExpArgument - Return the pre-expanded form of the specified /// argument. -const std::vector<Token> & -MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, - Preprocessor &PP) { - assert(Arg < MI->getNumParams() && "Invalid argument number!"); +const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg, + Preprocessor &PP) { + assert(Arg < getNumMacroArguments() && "Invalid argument number!"); // If we have already computed this, return it. - if (PreExpArgTokens.size() < MI->getNumParams()) - PreExpArgTokens.resize(MI->getNumParams()); + if (PreExpArgTokens.size() < getNumMacroArguments()) + PreExpArgTokens.resize(getNumMacroArguments()); std::vector<Token> &Result = PreExpArgTokens[Arg]; if (!Result.empty()) return Result; diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp index 6dc7841bc160b..b13767aa1d673 100644 --- a/lib/Lex/MacroInfo.cpp +++ b/lib/Lex/MacroInfo.cpp @@ -1,4 +1,4 @@ -//===--- MacroInfo.cpp - Information about #defined identifiers -----------===// +//===- MacroInfo.cpp - Information about #defined identifiers -------------===// // // The LLVM Compiler Infrastructure // @@ -12,25 +12,29 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/MacroInfo.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <utility> + using namespace clang; MacroInfo::MacroInfo(SourceLocation DefLoc) - : Location(DefLoc), - ParameterList(nullptr), - NumParameters(0), - IsDefinitionLengthCached(false), - IsFunctionLike(false), - IsC99Varargs(false), - IsGNUVarargs(false), - IsBuiltinMacro(false), - HasCommaPasting(false), - IsDisabled(false), - IsUsed(false), - IsAllowRedefinitionsWithoutWarning(false), - IsWarnIfUnused(false), - UsedForHeaderGuard(false) { -} + : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false), + IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false), + HasCommaPasting(false), IsDisabled(false), IsUsed(false), + IsAllowRedefinitionsWithoutWarning(false), IsWarnIfUnused(false), + UsedForHeaderGuard(false) {} unsigned MacroInfo::getDefinitionLengthSlow(const SourceManager &SM) const { assert(!IsDefinitionLengthCached); diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp index 40f78ce25ceb3..fbbae7a095203 100644 --- a/lib/Lex/ModuleMap.cpp +++ b/lib/Lex/ModuleMap.cpp @@ -1,4 +1,4 @@ -//===--- ModuleMap.cpp - Describe the layout of modules ---------*- C++ -*-===// +//===- ModuleMap.cpp - Describe the layout of modules ---------------------===// // // The LLVM Compiler Infrastructure // @@ -11,29 +11,47 @@ // of a module as it relates to headers. // //===----------------------------------------------------------------------===// + #include "clang/Lex/ModuleMap.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Basic/TargetOptions.h" +#include "clang/Basic/VirtualFileSystem.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include <stdlib.h> -#if defined(LLVM_ON_UNIX) -#include <limits.h> -#endif +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <string> +#include <system_error> +#include <utility> + using namespace clang; Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) { @@ -80,7 +98,7 @@ ModuleMap::resolveExport(Module *Mod, // Resolve the module-id. Module *Context = resolveModuleId(Unresolved.Id, Mod, Complain); if (!Context) - return Module::ExportDecl(); + return {}; return Module::ExportDecl(Context, Unresolved.Wildcard); } @@ -256,8 +274,7 @@ ModuleMap::ModuleMap(SourceManager &SourceMgr, DiagnosticsEngine &Diags, const LangOptions &LangOpts, const TargetInfo *Target, HeaderSearch &HeaderInfo) : SourceMgr(SourceMgr), Diags(Diags), LangOpts(LangOpts), Target(Target), - HeaderInfo(HeaderInfo), BuiltinIncludeDir(nullptr), - SourceModule(nullptr), NumCreatedModules(0) { + HeaderInfo(HeaderInfo) { MMapLangOpts.LineComment = true; } @@ -345,7 +362,7 @@ ModuleMap::KnownHeader ModuleMap::findHeaderInUmbrellaDirs(const FileEntry *File, SmallVectorImpl<const DirectoryEntry *> &IntermediateDirs) { if (UmbrellaDirs.empty()) - return KnownHeader(); + return {}; const DirectoryEntry *Dir = File->getDir(); assert(Dir && "file in no directory"); @@ -373,7 +390,7 @@ ModuleMap::findHeaderInUmbrellaDirs(const FileEntry *File, // Resolve the parent path to a directory entry. Dir = SourceMgr.getFileManager().getDirectory(DirName); } while (Dir); - return KnownHeader(); + return {}; } static bool violatesPrivateInclude(Module *RequestingModule, @@ -503,7 +520,7 @@ ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File, bool AllowTextual) { auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader { if (!AllowTextual && R.getRole() & ModuleMap::TextualHeader) - return ModuleMap::KnownHeader(); + return {}; return R; }; @@ -593,7 +610,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) { return Header; } - return KnownHeader(); + return {}; } ArrayRef<ModuleMap::KnownHeader> @@ -746,8 +763,18 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, return std::make_pair(Result, true); } +Module *ModuleMap::createGlobalModuleForInterfaceUnit(SourceLocation Loc) { + assert(!PendingGlobalModule && "created multiple global modules"); + PendingGlobalModule.reset( + new Module("<global>", Loc, nullptr, /*IsFramework*/ false, + /*IsExplicit*/ true, NumCreatedModules++)); + PendingGlobalModule->Kind = Module::GlobalModuleFragment; + return PendingGlobalModule.get(); +} + Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, - StringRef Name) { + StringRef Name, + Module *GlobalModule) { assert(LangOpts.CurrentModule == Name && "module name mismatch"); assert(!Modules[Name] && "redefining existing module"); @@ -757,6 +784,12 @@ Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, Result->Kind = Module::ModuleInterfaceUnit; Modules[Name] = SourceModule = Result; + // Reparent the current global module fragment as a submodule of this module. + assert(GlobalModule == PendingGlobalModule.get() && + "unexpected global module"); + GlobalModule->setParent(Result); + PendingGlobalModule.release(); // now owned by parent + // Mark the main source file as being within the newly-created module so that // declarations and macros are properly visibility-restricted to it. auto *MainFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()); @@ -1173,6 +1206,7 @@ bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) { //----------------------------------------------------------------------------// namespace clang { + /// \brief A token in a module map file. struct MMToken { enum TokenKind { @@ -1186,6 +1220,7 @@ namespace clang { ExcludeKeyword, ExplicitKeyword, ExportKeyword, + ExportAsKeyword, ExternKeyword, FrameworkKeyword, LinkKeyword, @@ -1210,6 +1245,7 @@ namespace clang { union { // If Kind != IntegerLiteral. const char *StringData; + // If Kind == IntegerLiteral. uint64_t IntegerValue; }; @@ -1259,7 +1295,7 @@ namespace clang { bool IsSystem; /// \brief Whether an error occurred. - bool HadError; + bool HadError = false; /// \brief Stores string data for the various string literals referenced /// during parsing. @@ -1269,7 +1305,7 @@ namespace clang { MMToken Tok; /// \brief The active module. - Module *ActiveModule; + Module *ActiveModule = nullptr; /// \brief Whether a module uses the 'requires excluded' hack to mark its /// contents as 'textual'. @@ -1288,22 +1324,24 @@ namespace clang { /// (or the end of the file). void skipUntil(MMToken::TokenKind K); - typedef SmallVector<std::pair<std::string, SourceLocation>, 2> ModuleId; + using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>; + bool parseModuleId(ModuleId &Id); void parseModuleDecl(); void parseExternModuleDecl(); void parseRequiresDecl(); - void parseHeaderDecl(clang::MMToken::TokenKind, - SourceLocation LeadingLoc); + void parseHeaderDecl(MMToken::TokenKind, SourceLocation LeadingLoc); void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc); void parseExportDecl(); + void parseExportAsDecl(); void parseUseDecl(); void parseLinkDecl(); void parseConfigMacros(); void parseConflict(); void parseInferredModuleDecl(bool Framework, bool Explicit); - typedef ModuleMap::Attributes Attributes; + using Attributes = ModuleMap::Attributes; + bool parseOptionalAttributes(Attributes &Attrs); public: @@ -1314,10 +1352,9 @@ namespace clang { const FileEntry *ModuleMapFile, const DirectoryEntry *Directory, bool IsSystem) - : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), - ModuleMapFile(ModuleMapFile), Directory(Directory), - IsSystem(IsSystem), HadError(false), ActiveModule(nullptr) - { + : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), + ModuleMapFile(ModuleMapFile), Directory(Directory), + IsSystem(IsSystem) { Tok.clear(); consumeToken(); } @@ -1327,7 +1364,8 @@ namespace clang { bool terminatedByDirective() { return false; } SourceLocation getLocation() { return Tok.getLocation(); } }; -} + +} // namespace clang SourceLocation ModuleMapParser::consumeToken() { SourceLocation Result = Tok.getLocation(); @@ -1348,6 +1386,7 @@ retry: .Case("exclude", MMToken::ExcludeKeyword) .Case("explicit", MMToken::ExplicitKeyword) .Case("export", MMToken::ExportKeyword) + .Case("export_as", MMToken::ExportAsKeyword) .Case("extern", MMToken::ExternKeyword) .Case("framework", MMToken::FrameworkKeyword) .Case("header", MMToken::HeaderKeyword) @@ -1548,20 +1587,26 @@ bool ModuleMapParser::parseModuleId(ModuleId &Id) { } namespace { + /// \brief Enumerates the known attributes. enum AttributeKind { /// \brief An unknown attribute. AT_unknown, + /// \brief The 'system' attribute. AT_system, + /// \brief The 'extern_c' attribute. AT_extern_c, + /// \brief The 'exhaustive' attribute. AT_exhaustive, + /// \brief The 'no_undeclared_includes' attribute. AT_no_undeclared_includes }; -} + +} // namespace /// \brief Parse a module declaration. /// @@ -1575,6 +1620,7 @@ namespace { /// header-declaration /// submodule-declaration /// export-declaration +/// export-as-declaration /// link-declaration /// /// submodule-declaration: @@ -1683,7 +1729,6 @@ void ModuleMapParser::parseModuleDecl() { if (parseOptionalAttributes(Attrs)) return; - // Parse the opening brace. if (!Tok.is(MMToken::LBrace)) { Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace) @@ -1809,6 +1854,10 @@ void ModuleMapParser::parseModuleDecl() { parseExportDecl(); break; + case MMToken::ExportAsKeyword: + parseExportAsDecl(); + break; + case MMToken::UseKeyword: parseUseDecl(); break; @@ -2269,6 +2318,41 @@ void ModuleMapParser::parseExportDecl() { ActiveModule->UnresolvedExports.push_back(Unresolved); } +/// \brief Parse a module export_as declaration. +/// +/// export-as-declaration: +/// 'export_as' identifier +void ModuleMapParser::parseExportAsDecl() { + assert(Tok.is(MMToken::ExportAsKeyword)); + consumeToken(); + + if (!Tok.is(MMToken::Identifier)) { + Diags.Report(Tok.getLocation(), diag::err_mmap_module_id); + HadError = true; + return; + } + + if (ActiveModule->Parent) { + Diags.Report(Tok.getLocation(), diag::err_mmap_submodule_export_as); + consumeToken(); + return; + } + + if (!ActiveModule->ExportAsModule.empty()) { + if (ActiveModule->ExportAsModule == Tok.getString()) { + Diags.Report(Tok.getLocation(), diag::warn_mmap_redundant_export_as) + << ActiveModule->Name << Tok.getString(); + } else { + Diags.Report(Tok.getLocation(), diag::err_mmap_conflicting_export_as) + << ActiveModule->Name << ActiveModule->ExportAsModule + << Tok.getString(); + } + } + + ActiveModule->ExportAsModule = Tok.getString(); + consumeToken(); +} + /// \brief Parse a module use declaration. /// /// use-declaration: @@ -2516,7 +2600,7 @@ void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) { Done = true; break; - case MMToken::ExcludeKeyword: { + case MMToken::ExcludeKeyword: if (ActiveModule) { Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member) << (ActiveModule != nullptr); @@ -2535,7 +2619,6 @@ void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) { .push_back(Tok.getString()); consumeToken(); break; - } case MMToken::ExportKeyword: if (!ActiveModule) { @@ -2674,6 +2757,7 @@ bool ModuleMapParser::parseModuleMapFile() { case MMToken::Exclaim: case MMToken::ExcludeKeyword: case MMToken::ExportKeyword: + case MMToken::ExportAsKeyword: case MMToken::HeaderKeyword: case MMToken::Identifier: case MMToken::LBrace: diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index b2450f516ba2a..ca3e70fd1060e 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -33,6 +33,7 @@ #include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/PTHLexer.h" #include "clang/Lex/Token.h" +#include "clang/Lex/VariadicMacroSupport.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -349,15 +350,19 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { /// If ElseOk is true, then \#else directives are ok, if not, then we have /// already seen one so a \#else directive is a duplicate. When this returns, /// the caller can lex the first valid token. -void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, +void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, + SourceLocation IfTokenLoc, bool FoundNonSkipPortion, bool FoundElse, SourceLocation ElseLoc) { ++NumSkipped; assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?"); - CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false, - FoundNonSkipPortion, FoundElse); + if (PreambleConditionalStack.reachedEOFWhileSkipping()) + PreambleConditionalStack.clearSkipInfo(); + else + CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false, + FoundNonSkipPortion, FoundElse); if (CurPTHLexer) { PTHSkipExcludedConditionalBlock(); @@ -380,16 +385,12 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // If this is the end of the buffer, we have an error. if (Tok.is(tok::eof)) { - // Emit errors for each unterminated conditional on the stack, including - // the current one. - while (!CurPPLexer->ConditionalStack.empty()) { - if (CurLexer->getFileLoc() != CodeCompletionFileLoc) - Diag(CurPPLexer->ConditionalStack.back().IfLoc, - diag::err_pp_unterminated_conditional); - CurPPLexer->ConditionalStack.pop_back(); - } - + // We don't emit errors for unterminated conditionals here, + // Lexer::LexEndOfFile can do that propertly. // Just return and let the caller lex after this #include. + if (PreambleConditionalStack.isRecording()) + PreambleConditionalStack.SkipInfo.emplace( + HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc); break; } @@ -557,10 +558,10 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // the #if block. CurPPLexer->LexingRawMode = false; - if (Callbacks) { - SourceLocation BeginLoc = ElseLoc.isValid() ? ElseLoc : IfTokenLoc; - Callbacks->SourceRangeSkipped(SourceRange(BeginLoc, Tok.getLocation())); - } + if (Callbacks) + Callbacks->SourceRangeSkipped( + SourceRange(HashTokenLoc, CurPPLexer->getSourceLocation()), + Tok.getLocation()); } void Preprocessor::PTHSkipExcludedConditionalBlock() { @@ -948,15 +949,17 @@ void Preprocessor::HandleDirective(Token &Result) { default: break; // C99 6.10.1 - Conditional Inclusion. case tok::pp_if: - return HandleIfDirective(Result, ReadAnyTokensBeforeDirective); + return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective); case tok::pp_ifdef: - return HandleIfdefDirective(Result, false, true/*not valid for miopt*/); + return HandleIfdefDirective(Result, SavedHash, false, + true /*not valid for miopt*/); case tok::pp_ifndef: - return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective); + return HandleIfdefDirective(Result, SavedHash, true, + ReadAnyTokensBeforeDirective); case tok::pp_elif: - return HandleElifDirective(Result); + return HandleElifDirective(Result, SavedHash); case tok::pp_else: - return HandleElseDirective(Result); + return HandleElseDirective(Result, SavedHash); case tok::pp_endif: return HandleEndifDirective(Result); @@ -2135,19 +2138,19 @@ void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc, // Preprocessor Macro Directive Handling. //===----------------------------------------------------------------------===// -/// ReadMacroParameterList - The ( starting an argument list of a macro -/// definition has just been read. Lex the rest of the arguments and the +/// ReadMacroParameterList - The ( starting a parameter list of a macro +/// definition has just been read. Lex the rest of the parameters and the /// closing ), updating MI with what we learn. Return true if an error occurs -/// parsing the arg list. +/// parsing the param list. bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { - SmallVector<IdentifierInfo*, 32> Arguments; + SmallVector<IdentifierInfo*, 32> Parameters; while (true) { LexUnexpandedToken(Tok); switch (Tok.getKind()) { case tok::r_paren: - // Found the end of the argument list. - if (Arguments.empty()) // #define FOO() + // Found the end of the parameter list. + if (Parameters.empty()) // #define FOO() return false; // Otherwise we have #define FOO(A,) Diag(Tok, diag::err_pp_expected_ident_in_arg_list); @@ -2170,10 +2173,10 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); return true; } - // Add the __VA_ARGS__ identifier as an argument. - Arguments.push_back(Ident__VA_ARGS__); + // Add the __VA_ARGS__ identifier as a parameter. + Parameters.push_back(Ident__VA_ARGS__); MI->setIsC99Varargs(); - MI->setParameterList(Arguments, BP); + MI->setParameterList(Parameters, BP); return false; case tok::eod: // #define X( Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); @@ -2188,16 +2191,16 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { return true; } - // If this is already used as an argument, it is used multiple times (e.g. + // If this is already used as a parameter, it is used multiple times (e.g. // #define X(A,A. - if (std::find(Arguments.begin(), Arguments.end(), II) != - Arguments.end()) { // C99 6.10.3p6 + if (std::find(Parameters.begin(), Parameters.end(), II) != + Parameters.end()) { // C99 6.10.3p6 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II; return true; } - // Add the argument to the macro info. - Arguments.push_back(II); + // Add the parameter to the macro info. + Parameters.push_back(II); // Lex the token after the identifier. LexUnexpandedToken(Tok); @@ -2207,7 +2210,7 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { Diag(Tok, diag::err_pp_expected_comma_in_arg_list); return true; case tok::r_paren: // #define X(A) - MI->setParameterList(Arguments, BP); + MI->setParameterList(Parameters, BP); return false; case tok::comma: // #define X(A, break; @@ -2223,7 +2226,7 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { } MI->setIsGNUVarargs(); - MI->setParameterList(Arguments, BP); + MI->setParameterList(Parameters, BP); return false; } } @@ -2290,6 +2293,10 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( Token Tok; LexUnexpandedToken(Tok); + // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk + // within their appropriate context. + VariadicMacroScopeGuard VariadicMacroScopeGuard(*this); + // If this is a function-like macro definition, parse the argument list, // marking each of the identifiers as being used as macro arguments. Also, // check other constraints on the first token of the macro body. @@ -2314,14 +2321,14 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( return nullptr; } - // If this is a definition of a variadic C99 function-like macro, not using - // the GNU named varargs extension, enabled __VA_ARGS__. + // If this is a definition of an ISO C/C++ variadic function-like macro (not + // using the GNU named varargs extension) inform our variadic scope guard + // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__) + // allowed only within the definition of a variadic macro. - // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. - // This gets unpoisoned where it is allowed. - assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!"); - if (MI->isC99Varargs()) - Ident__VA_ARGS__->setIsPoisoned(false); + if (MI->isC99Varargs()) { + VariadicMacroScopeGuard.enterScope(); + } // Read the first token after the arg list for down below. LexUnexpandedToken(Tok); @@ -2367,12 +2374,50 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( // Otherwise, read the body of a function-like macro. While we are at it, // check C99 6.10.3.2p1: ensure that # operators are followed by macro // parameters in function-like macro expansions. + + VAOptDefinitionContext VAOCtx(*this); + while (Tok.isNot(tok::eod)) { LastTok = Tok; if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) { MI->AddTokenToBody(Tok); + if (VAOCtx.isVAOptToken(Tok)) { + // If we're already within a VAOPT, emit an error. + if (VAOCtx.isInVAOpt()) { + Diag(Tok, diag::err_pp_vaopt_nested_use); + return nullptr; + } + // Ensure VAOPT is followed by a '(' . + LexUnexpandedToken(Tok); + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use); + return nullptr; + } + MI->AddTokenToBody(Tok); + VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation()); + LexUnexpandedToken(Tok); + if (Tok.is(tok::hashhash)) { + Diag(Tok, diag::err_vaopt_paste_at_start); + return nullptr; + } + continue; + } else if (VAOCtx.isInVAOpt()) { + if (Tok.is(tok::r_paren)) { + if (VAOCtx.sawClosingParen()) { + const unsigned NumTokens = MI->getNumTokens(); + assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( " + "and a subsequent tok::r_paren"); + if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) { + Diag(Tok, diag::err_vaopt_paste_at_end); + return nullptr; + } + } + } else if (Tok.is(tok::l_paren)) { + VAOCtx.sawOpeningParen(Tok.getLocation()); + } + } // Get the next token of the macro. LexUnexpandedToken(Tok); continue; @@ -2413,12 +2458,14 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( continue; } + // Our Token is a stringization operator. // Get the next token of the macro. LexUnexpandedToken(Tok); - // Check for a valid macro arg identifier. - if (Tok.getIdentifierInfo() == nullptr || - MI->getParameterNum(Tok.getIdentifierInfo()) == -1) { + // Check for a valid macro arg identifier or __VA_OPT__. + if (!VAOCtx.isVAOptToken(Tok) && + (Tok.getIdentifierInfo() == nullptr || + MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) { // If this is assembler-with-cpp mode, we accept random gibberish after // the '#' because '#' is often a comment character. However, change @@ -2431,26 +2478,33 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( } else { Diag(Tok, diag::err_pp_stringize_not_parameter) << LastTok.is(tok::hashat); - - // Disable __VA_ARGS__ again. - Ident__VA_ARGS__->setIsPoisoned(true); return nullptr; } } // Things look ok, add the '#' and param name tokens to the macro. MI->AddTokenToBody(LastTok); - MI->AddTokenToBody(Tok); - LastTok = Tok; - // Get the next token of the macro. - LexUnexpandedToken(Tok); + // If the token following '#' is VAOPT, let the next iteration handle it + // and check it for correctness, otherwise add the token and prime the + // loop with the next one. + if (!VAOCtx.isVAOptToken(Tok)) { + MI->AddTokenToBody(Tok); + LastTok = Tok; + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + } + } + if (VAOCtx.isInVAOpt()) { + assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive"); + Diag(Tok, diag::err_pp_expected_after) + << LastTok.getKind() << tok::r_paren; + Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren; + return nullptr; } } MI->setDefinitionEndLoc(LastTok.getLocation()); - // Disable __VA_ARGS__ again. - Ident__VA_ARGS__->setIsPoisoned(true); - return MI; } /// HandleDefineDirective - Implements \#define. This consumes the entire macro @@ -2614,7 +2668,9 @@ void Preprocessor::HandleUndefDirective() { /// true if any tokens have been returned or pp-directives activated before this /// \#ifndef has been lexed. /// -void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, +void Preprocessor::HandleIfdefDirective(Token &Result, + const Token &HashToken, + bool isIfndef, bool ReadAnyTokensBeforeDirective) { ++NumIf; Token DirectiveTok = Result; @@ -2626,8 +2682,9 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, if (MacroNameTok.is(tok::eod)) { // Skip code until we get to #endif. This helps with recovery by not // emitting an error when the #endif is reached. - SkipExcludedConditionalBlock(DirectiveTok.getLocation(), - /*Foundnonskip*/false, /*FoundElse*/false); + SkipExcludedConditionalBlock(HashToken.getLocation(), + DirectiveTok.getLocation(), + /*Foundnonskip*/ false, /*FoundElse*/ false); return; } @@ -2675,15 +2732,17 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, /*foundelse*/false); } else { // No, skip the contents of this block. - SkipExcludedConditionalBlock(DirectiveTok.getLocation(), - /*Foundnonskip*/false, - /*FoundElse*/false); + SkipExcludedConditionalBlock(HashToken.getLocation(), + DirectiveTok.getLocation(), + /*Foundnonskip*/ false, + /*FoundElse*/ false); } } /// HandleIfDirective - Implements the \#if directive. /// void Preprocessor::HandleIfDirective(Token &IfToken, + const Token &HashToken, bool ReadAnyTokensBeforeDirective) { ++NumIf; @@ -2721,8 +2780,9 @@ void Preprocessor::HandleIfDirective(Token &IfToken, /*foundnonskip*/true, /*foundelse*/false); } else { // No, skip the contents of this block. - SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false, - /*FoundElse*/false); + SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(), + /*Foundnonskip*/ false, + /*FoundElse*/ false); } } @@ -2754,7 +2814,7 @@ void Preprocessor::HandleEndifDirective(Token &EndifToken) { /// HandleElseDirective - Implements the \#else directive. /// -void Preprocessor::HandleElseDirective(Token &Result) { +void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) { ++NumElse; // #else directive in a non-skipping conditional... start skipping. @@ -2785,13 +2845,15 @@ void Preprocessor::HandleElseDirective(Token &Result) { } // Finally, skip the rest of the contents of this block. - SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, - /*FoundElse*/true, Result.getLocation()); + SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc, + /*Foundnonskip*/ true, + /*FoundElse*/ true, Result.getLocation()); } /// HandleElifDirective - Implements the \#elif directive. /// -void Preprocessor::HandleElifDirective(Token &ElifToken) { +void Preprocessor::HandleElifDirective(Token &ElifToken, + const Token &HashToken) { ++NumElse; // #elif directive in a non-skipping conditional... start skipping. @@ -2828,7 +2890,7 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) { } // Finally, skip the rest of the contents of this block. - SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, - /*FoundElse*/CI.FoundElse, - ElifToken.getLocation()); + SkipExcludedConditionalBlock( + HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true, + /*FoundElse*/ CI.FoundElse, ElifToken.getLocation()); } diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index 36d7028da6886..e484e9c4c3a38 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -40,10 +40,9 @@ bool Preprocessor::isInPrimaryFile() const { // If there are any stacked lexers, we're in a #include. assert(IsFileLexer(IncludeMacroStack[0]) && "Top level include stack isn't our primary lexer?"); - return std::none_of(IncludeMacroStack.begin() + 1, IncludeMacroStack.end(), - [this](const IncludeStackInfo &ISI) -> bool { - return IsFileLexer(ISI); - }); + return std::none_of( + IncludeMacroStack.begin() + 1, IncludeMacroStack.end(), + [&](const IncludeStackInfo &ISI) -> bool { return IsFileLexer(ISI); }); } /// getCurrentLexer - Return the current file lexer being lexed from. Note diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index 3f8ede23da563..41633f90c34da 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -369,11 +369,17 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__has_extension = RegisterBuiltinMacro(*this, "__has_extension"); Ident__has_builtin = RegisterBuiltinMacro(*this, "__has_builtin"); Ident__has_attribute = RegisterBuiltinMacro(*this, "__has_attribute"); + Ident__has_c_attribute = RegisterBuiltinMacro(*this, "__has_c_attribute"); Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute"); Ident__has_include = RegisterBuiltinMacro(*this, "__has_include"); Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next"); Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning"); Ident__is_identifier = RegisterBuiltinMacro(*this, "__is_identifier"); + Ident__is_target_arch = RegisterBuiltinMacro(*this, "__is_target_arch"); + Ident__is_target_vendor = RegisterBuiltinMacro(*this, "__is_target_vendor"); + Ident__is_target_os = RegisterBuiltinMacro(*this, "__is_target_os"); + Ident__is_target_environment = + RegisterBuiltinMacro(*this, "__is_target_environment"); // Modules. Ident__building_module = RegisterBuiltinMacro(*this, "__building_module"); @@ -1023,7 +1029,7 @@ Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer, size_t newIndex = MacroExpandedTokens.size(); bool cacheNeedsToGrow = tokens.size() > - MacroExpandedTokens.capacity()-MacroExpandedTokens.size(); + MacroExpandedTokens.capacity()-MacroExpandedTokens.size(); MacroExpandedTokens.append(tokens.begin(), tokens.end()); if (cacheNeedsToGrow) { @@ -1098,6 +1104,8 @@ static bool HasFeature(const Preprocessor &PP, StringRef Feature) { .Case("address_sanitizer", LangOpts.Sanitize.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress)) + .Case("hwaddress_sanitizer", + LangOpts.Sanitize.hasOneOf(SanitizerKind::HWAddress)) .Case("assume_nonnull", true) .Case("attribute_analyzer_noreturn", true) .Case("attribute_availability", true) @@ -1135,9 +1143,11 @@ static bool HasFeature(const Preprocessor &PP, StringRef Feature) { .Case("nullability_on_arrays", true) .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory)) .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread)) - .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow)) + .Case("dataflow_sanitizer", + LangOpts.Sanitize.has(SanitizerKind::DataFlow)) .Case("efficiency_sanitizer", LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) + .Case("scudo", LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo)) // Objective-C features .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? .Case("objc_arc", LangOpts.ObjCAutoRefCount) @@ -1588,6 +1598,56 @@ static IdentifierInfo *ExpectFeatureIdentifierInfo(Token &Tok, return nullptr; } +/// Implements the __is_target_arch builtin macro. +static bool isTargetArch(const TargetInfo &TI, const IdentifierInfo *II) { + std::string ArchName = II->getName().lower() + "--"; + llvm::Triple Arch(ArchName); + const llvm::Triple &TT = TI.getTriple(); + if (TT.isThumb()) { + // arm matches thumb or thumbv7. armv7 matches thumbv7. + if ((Arch.getSubArch() == llvm::Triple::NoSubArch || + Arch.getSubArch() == TT.getSubArch()) && + ((TT.getArch() == llvm::Triple::thumb && + Arch.getArch() == llvm::Triple::arm) || + (TT.getArch() == llvm::Triple::thumbeb && + Arch.getArch() == llvm::Triple::armeb))) + return true; + } + // Check the parsed arch when it has no sub arch to allow Clang to + // match thumb to thumbv7 but to prohibit matching thumbv6 to thumbv7. + return (Arch.getSubArch() == llvm::Triple::NoSubArch || + Arch.getSubArch() == TT.getSubArch()) && + Arch.getArch() == TT.getArch(); +} + +/// Implements the __is_target_vendor builtin macro. +static bool isTargetVendor(const TargetInfo &TI, const IdentifierInfo *II) { + StringRef VendorName = TI.getTriple().getVendorName(); + if (VendorName.empty()) + VendorName = "unknown"; + return VendorName.equals_lower(II->getName()); +} + +/// Implements the __is_target_os builtin macro. +static bool isTargetOS(const TargetInfo &TI, const IdentifierInfo *II) { + std::string OSName = + (llvm::Twine("unknown-unknown-") + II->getName().lower()).str(); + llvm::Triple OS(OSName); + if (OS.getOS() == llvm::Triple::Darwin) { + // Darwin matches macos, ios, etc. + return TI.getTriple().isOSDarwin(); + } + return TI.getTriple().getOS() == OS.getOS(); +} + +/// Implements the __is_target_environment builtin macro. +static bool isTargetEnvironment(const TargetInfo &TI, + const IdentifierInfo *II) { + std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str(); + llvm::Triple Env(EnvName); + return TI.getTriple().getEnvironment() == Env.getEnvironment(); +} + /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded /// as a builtin macro, handle it and return the next token as 'Tok'. void Preprocessor::ExpandBuiltinMacro(Token &Tok) { @@ -1750,6 +1810,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { .Case("__make_integer_seq", LangOpts.CPlusPlus) .Case("__type_pack_element", LangOpts.CPlusPlus) .Case("__builtin_available", true) + .Case("__is_target_arch", true) + .Case("__is_target_vendor", true) + .Case("__is_target_os", true) + .Case("__is_target_environment", true) .Default(false); } }); @@ -1774,30 +1838,34 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { return II ? hasAttribute(AttrSyntax::Declspec, nullptr, II, getTargetInfo(), getLangOpts()) : 0; }); - } else if (II == Ident__has_cpp_attribute) { - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, - [this](Token &Tok, bool &HasLexedNextToken) -> int { - IdentifierInfo *ScopeII = nullptr; - IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, - diag::err_feature_check_malformed); - if (!II) - return false; - - // It is possible to receive a scope token. Read the "::", if it is - // available, and the subsequent identifier. - LexUnexpandedToken(Tok); - if (Tok.isNot(tok::coloncolon)) - HasLexedNextToken = true; - else { - ScopeII = II; + } else if (II == Ident__has_cpp_attribute || + II == Ident__has_c_attribute) { + bool IsCXX = II == Ident__has_cpp_attribute; + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, [&](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *ScopeII = nullptr; + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + if (!II) + return false; + + // It is possible to receive a scope token. Read the "::", if it is + // available, and the subsequent identifier. LexUnexpandedToken(Tok); - II = ExpectFeatureIdentifierInfo(Tok, *this, - diag::err_feature_check_malformed); - } + if (Tok.isNot(tok::coloncolon)) + HasLexedNextToken = true; + else { + ScopeII = II; + LexUnexpandedToken(Tok); + II = ExpectFeatureIdentifierInfo(Tok, *this, + diag::err_feature_check_malformed); + } - return II ? hasAttribute(AttrSyntax::CXX, ScopeII, II, - getTargetInfo(), getLangOpts()) : 0; - }); + AttrSyntax Syntax = IsCXX ? AttrSyntax::CXX : AttrSyntax::C; + return II ? hasAttribute(Syntax, ScopeII, II, getTargetInfo(), + getLangOpts()) + : 0; + }); } else if (II == Ident__has_include || II == Ident__has_include_next) { // The argument to these two builtins should be a parenthesized @@ -1897,6 +1965,34 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Diag(LParenLoc, diag::note_matching) << tok::l_paren; } return; + } else if (II == Ident__is_target_arch) { + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + return II && isTargetArch(getTargetInfo(), II); + }); + } else if (II == Ident__is_target_vendor) { + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + return II && isTargetVendor(getTargetInfo(), II); + }); + } else if (II == Ident__is_target_os) { + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + return II && isTargetOS(getTargetInfo(), II); + }); + } else if (II == Ident__is_target_environment) { + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + return II && isTargetEnvironment(getTargetInfo(), II); + }); } else { llvm_unreachable("Unknown identifier!"); } diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index ec806e8445311..d6c20a13d27be 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -1,4 +1,4 @@ -//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// +//===- PTHLexer.cpp - Lex from a token stream -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,19 +12,32 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/PTHLexer.h" +#include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/FileSystemStatCache.h" #include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/PTHManager.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/EndianStream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/OnDiskHashTable.h" +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <ctime> #include <memory> -#include <system_error> +#include <utility> + using namespace clang; static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4; @@ -35,9 +48,8 @@ static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4; PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, const unsigned char *ppcond, PTHManager &PM) - : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr), - PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) { - + : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), PPCond(ppcond), + CurPPCondPtr(ppcond), PTHMgr(PM) { FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); } @@ -167,7 +179,7 @@ void PTHLexer::DiscardToEndOfLine() { // We don't need to actually reconstruct full tokens from the token buffer. // This saves some copies and it also reduces IdentifierInfo* lookup. const unsigned char* p = CurPtr; - while (1) { + while (true) { // Read the token kind. Are we at the end of the file? tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; if (x == tok::eof) break; @@ -186,6 +198,7 @@ void PTHLexer::DiscardToEndOfLine() { /// SkipBlock - Used by Preprocessor to skip the current conditional block. bool PTHLexer::SkipBlock() { using namespace llvm::support; + assert(CurPPCondPtr && "No cached PP conditional information."); assert(LastHashTokPtr && "No known '#' token."); @@ -303,23 +316,24 @@ SourceLocation PTHLexer::getSourceLocation() { /// to map from FileEntry objects managed by FileManager to offsets within /// the PTH file. namespace { + class PTHFileData { const uint32_t TokenOff; const uint32_t PPCondOff; + public: PTHFileData(uint32_t tokenOff, uint32_t ppCondOff) - : TokenOff(tokenOff), PPCondOff(ppCondOff) {} + : TokenOff(tokenOff), PPCondOff(ppCondOff) {} uint32_t getTokenOffset() const { return TokenOff; } uint32_t getPPCondOffset() const { return PPCondOff; } }; - class PTHFileLookupCommonTrait { public: - typedef std::pair<unsigned char, StringRef> internal_key_type; - typedef unsigned hash_value_type; - typedef unsigned offset_type; + using internal_key_type = std::pair<unsigned char, StringRef>; + using hash_value_type = unsigned; + using offset_type = unsigned; static hash_value_type ComputeHash(internal_key_type x) { return llvm::HashString(x.second); @@ -328,6 +342,7 @@ public: static std::pair<unsigned, unsigned> ReadKeyDataLength(const unsigned char*& d) { using namespace llvm::support; + unsigned keyLen = (unsigned)endian::readNext<uint16_t, little, unaligned>(d); unsigned dataLen = (unsigned) *(d++); @@ -340,12 +355,12 @@ public: } }; -} // end anonymous namespace +} // namespace class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait { public: - typedef const FileEntry* external_key_type; - typedef PTHFileData data_type; + using external_key_type = const FileEntry *; + using data_type = PTHFileData; static internal_key_type GetInternalKey(const FileEntry* FE) { return std::make_pair((unsigned char) 0x1, FE->getName()); @@ -357,8 +372,9 @@ public: static PTHFileData ReadData(const internal_key_type& k, const unsigned char* d, unsigned) { - assert(k.first == 0x1 && "Only file lookups can match!"); using namespace llvm::support; + + assert(k.first == 0x1 && "Only file lookups can match!"); uint32_t x = endian::readNext<uint32_t, little, unaligned>(d); uint32_t y = endian::readNext<uint32_t, little, unaligned>(d); return PTHFileData(x, y); @@ -367,11 +383,11 @@ public: class PTHManager::PTHStringLookupTrait { public: - typedef uint32_t data_type; - typedef const std::pair<const char*, unsigned> external_key_type; - typedef external_key_type internal_key_type; - typedef uint32_t hash_value_type; - typedef unsigned offset_type; + using data_type = uint32_t; + using external_key_type = const std::pair<const char *, unsigned>; + using internal_key_type = external_key_type; + using hash_value_type = uint32_t; + using offset_type = unsigned; static bool EqualKey(const internal_key_type& a, const internal_key_type& b) { @@ -390,6 +406,7 @@ public: static std::pair<unsigned, unsigned> ReadKeyDataLength(const unsigned char*& d) { using namespace llvm::support; + return std::make_pair( (unsigned)endian::readNext<uint16_t, little, unaligned>(d), sizeof(uint32_t)); @@ -404,6 +421,7 @@ public: static uint32_t ReadData(const internal_key_type& k, const unsigned char* d, unsigned) { using namespace llvm::support; + return endian::readNext<uint32_t, little, unaligned>(d); } }; @@ -420,11 +438,10 @@ PTHManager::PTHManager( const unsigned char *spellingBase, const char *originalSourceFile) : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)), FileLookup(std::move(fileLookup)), IdDataTable(idDataTable), - StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr), + StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {} -PTHManager::~PTHManager() { -} +PTHManager::~PTHManager() = default; static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) { Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg; @@ -557,6 +574,7 @@ PTHManager *PTHManager::Create(StringRef file, DiagnosticsEngine &Diags) { IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { using namespace llvm::support; + // Look in the PTH file for the string data for the IdentifierInfo object. const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID; const unsigned char *IDData = @@ -566,7 +584,7 @@ IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { // Allocate the object. std::pair<IdentifierInfo,const unsigned char*> *Mem = - Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >(); + Alloc.Allocate<std::pair<IdentifierInfo, const unsigned char *>>(); Mem->second = IDData; assert(IDData[0] != '\0'); @@ -626,26 +644,26 @@ PTHLexer *PTHManager::CreateLexer(FileID FID) { //===----------------------------------------------------------------------===// namespace { + class PTHStatData { public: uint64_t Size; time_t ModTime; llvm::sys::fs::UniqueID UniqueID; - const bool HasData; + const bool HasData = false; bool IsDirectory; + PTHStatData() = default; PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID, bool IsDirectory) : Size(Size), ModTime(ModTime), UniqueID(UniqueID), HasData(true), IsDirectory(IsDirectory) {} - - PTHStatData() : HasData(false) {} }; class PTHStatLookupTrait : public PTHFileLookupCommonTrait { public: - typedef StringRef external_key_type; // const char* - typedef PTHStatData data_type; + using external_key_type = StringRef; // const char* + using data_type = PTHStatData; static internal_key_type GetInternalKey(StringRef path) { // The key 'kind' doesn't matter here because it is ignored in EqualKey. @@ -660,7 +678,6 @@ public: static data_type ReadData(const internal_key_type& k, const unsigned char* d, unsigned) { - if (k.first /* File or Directory */) { bool IsDirectory = true; if (k.first == 0x1 /* File */) { @@ -682,11 +699,14 @@ public: return data_type(); } }; -} // end anonymous namespace + +} // namespace namespace clang { + class PTHStatCache : public FileSystemStatCache { - typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy; + using CacheTy = llvm::OnDiskChainedHashTable<PTHStatLookupTrait>; + CacheTy Cache; public: @@ -720,7 +740,8 @@ public: return CacheExists; } }; -} + +} // namespace clang std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() { return llvm::make_unique<PTHStatCache>(*FileLookup); diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index bf2363a0a6f45..b8acd92521fb5 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -1,4 +1,4 @@ -//===--- Pragma.cpp - Pragma registration and handling --------------------===// +//===- Pragma.cpp - Pragma registration and handling ----------------------===// // // The LLVM Compiler Infrastructure // @@ -13,15 +13,21 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/Pragma.h" +#include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/Module.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/MacroInfo.h" +#include "clang/Lex/ModuleLoader.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorLexer.h" @@ -30,25 +36,27 @@ #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> #include <cassert> +#include <cstddef> #include <cstdint> #include <limits> #include <string> +#include <utility> #include <vector> using namespace clang; // Out-of-line destructor to provide a home for the class. -PragmaHandler::~PragmaHandler() { -} +PragmaHandler::~PragmaHandler() = default; //===----------------------------------------------------------------------===// // EmptyPragmaHandler Implementation. @@ -144,15 +152,14 @@ namespace { class LexingFor_PragmaRAII { Preprocessor &PP; bool InMacroArgPreExpansion; - bool Failed; + bool Failed = false; Token &OutTok; Token PragmaTok; public: LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion, Token &Tok) - : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), - Failed(false), OutTok(Tok) { + : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) { if (InMacroArgPreExpansion) { PragmaTok = OutTok; PP.EnableBacktrackAtThisPos(); @@ -186,13 +193,12 @@ public: } }; -} // end anonymous namespace +} // namespace /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then /// return the first token after the directive. The _Pragma token has just /// been read into 'Tok'. void Preprocessor::Handle_Pragma(Token &Tok) { - // This works differently if we are pre-expanding a macro argument. // In that case we don't actually "activate" the pragma now, we only lex it // until we are sure it is lexically correct and then we backtrack so that @@ -381,7 +387,6 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { } /// HandlePragmaOnce - Handle \#pragma once. OnceTok is the 'once'. -/// void Preprocessor::HandlePragmaOnce(Token &OnceTok) { // Don't honor the 'once' when handling the primary source file, unless // this is a prefix to a TU, which indicates we're generating a PCH file, or @@ -406,7 +411,6 @@ void Preprocessor::HandlePragmaMark() { } /// HandlePragmaPoison - Handle \#pragma GCC poison. PoisonTok is the 'poison'. -/// void Preprocessor::HandlePragmaPoison() { Token Tok; @@ -461,7 +465,6 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { // Mark the file as a system header. HeaderInfo.MarkFileSystemHeader(TheLexer->getFileEntry()); - PresumedLoc PLoc = SourceMgr.getPresumedLoc(SysHeaderTok.getLocation()); if (PLoc.isInvalid()) return; @@ -482,7 +485,6 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { } /// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah. -/// void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { Token FilenameTok; CurPPLexer->LexIncludeFilename(FilenameTok); @@ -623,7 +625,7 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { if (!IdentInfo) return; // Find the vector<MacroInfo*> associated with the macro. - llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> >::iterator iter = + llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>::iterator iter = PragmaPushMacroInfo.find(IdentInfo); if (iter != PragmaPushMacroInfo.end()) { // Forget the MacroInfo currently associated with IdentInfo. @@ -962,6 +964,7 @@ namespace { /// PragmaOnceHandler - "\#pragma once" marks the file as atomically included. struct PragmaOnceHandler : public PragmaHandler { PragmaOnceHandler() : PragmaHandler("once") {} + void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &OnceTok) override { PP.CheckEndOfDirective("pragma once"); @@ -1116,7 +1119,6 @@ struct PragmaDebugHandler : public PragmaHandler { #ifdef _MSC_VER #pragma warning(default : 4717) #endif - }; /// PragmaDiagnosticHandler - e.g. '\#pragma GCC diagnostic ignored "-Wformat"' @@ -1125,8 +1127,8 @@ private: const char *Namespace; public: - explicit PragmaDiagnosticHandler(const char *NS) : - PragmaHandler("diagnostic"), Namespace(NS) {} + explicit PragmaDiagnosticHandler(const char *NS) + : PragmaHandler("diagnostic"), Namespace(NS) {} void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &DiagToken) override { @@ -1330,6 +1332,7 @@ struct PragmaWarningHandler : public PragmaHandler { /// PragmaIncludeAliasHandler - "\#pragma include_alias("...")". struct PragmaIncludeAliasHandler : public PragmaHandler { PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {} + void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &IncludeAliasTok) override { PP.HandlePragmaIncludeAlias(IncludeAliasTok); @@ -1370,7 +1373,8 @@ private: public: PragmaMessageHandler(PPCallbacks::PragmaMessageKind Kind, StringRef Namespace = StringRef()) - : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind), Namespace(Namespace) {} + : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind), + Namespace(Namespace) {} void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &Tok) override { @@ -1615,8 +1619,7 @@ struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler { /// PragmaSTDC_CX_LIMITED_RANGEHandler - "\#pragma STDC CX_LIMITED_RANGE ...". struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { - PragmaSTDC_CX_LIMITED_RANGEHandler() - : PragmaHandler("CX_LIMITED_RANGE") {} + PragmaSTDC_CX_LIMITED_RANGEHandler() : PragmaHandler("CX_LIMITED_RANGE") {} void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &Tok) override { @@ -1627,7 +1630,7 @@ struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { /// PragmaSTDC_UnknownHandler - "\#pragma STDC ...". struct PragmaSTDC_UnknownHandler : public PragmaHandler { - PragmaSTDC_UnknownHandler() {} + PragmaSTDC_UnknownHandler() = default; void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &UnknownTok) override { @@ -1725,6 +1728,7 @@ struct PragmaAssumeNonNullHandler : public PragmaHandler { // The start location we want after processing this. SourceLocation NewLoc; + PPCallbacks *Callbacks = PP.getPPCallbacks(); if (IsBegin) { // Complain about attempts to re-enter an audit. @@ -1733,6 +1737,8 @@ struct PragmaAssumeNonNullHandler : public PragmaHandler { PP.Diag(BeginLoc, diag::note_pragma_entered_here); } NewLoc = Loc; + if (Callbacks) + Callbacks->PragmaAssumeNonNullBegin(NewLoc); } else { // Complain about attempts to leave an audit that doesn't exist. if (!BeginLoc.isValid()) { @@ -1740,6 +1746,8 @@ struct PragmaAssumeNonNullHandler : public PragmaHandler { return; } NewLoc = SourceLocation(); + if (Callbacks) + Callbacks->PragmaAssumeNonNullEnd(NewLoc); } PP.setPragmaAssumeNonNullLoc(NewLoc); @@ -1758,7 +1766,7 @@ struct PragmaAssumeNonNullHandler : public PragmaHandler { /// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a> /// pragma, just skipped by compiler. struct PragmaRegionHandler : public PragmaHandler { - PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) { } + PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) {} void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &NameTok) override { @@ -1769,7 +1777,7 @@ struct PragmaRegionHandler : public PragmaHandler { } }; -} // end anonymous namespace +} // namespace /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas: /// \#pragma GCC poison/system_header/dependency and \#pragma once. diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp index 03c4cbe589d50..af439dbfa5842 100644 --- a/lib/Lex/PreprocessingRecord.cpp +++ b/lib/Lex/PreprocessingRecord.cpp @@ -1,4 +1,4 @@ -//===--- PreprocessingRecord.cpp - Record of Preprocessing ------*- C++ -*-===// +//===- PreprocessingRecord.cpp - Record of Preprocessing ------------------===// // // The LLVM Compiler Infrastructure // @@ -11,15 +11,34 @@ // of what occurred during preprocessing, and its helpers. // //===----------------------------------------------------------------------===// + #include "clang/Lex/PreprocessingRecord.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Token.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstring> +#include <iterator> +#include <utility> +#include <vector> using namespace clang; -ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() { } +ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() = + default; InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, InclusionKind Kind, StringRef FileName, @@ -33,10 +52,7 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, this->FileName = StringRef(Memory, FileName.size()); } -PreprocessingRecord::PreprocessingRecord(SourceManager &SM) - : SourceMgr(SM), - ExternalSource(nullptr) { -} +PreprocessingRecord::PreprocessingRecord(SourceManager &SM) : SourceMgr(SM) {} /// \brief Returns a pair of [Begin, End) iterators of preprocessed entities /// that source range \p Range encompasses. @@ -166,7 +182,7 @@ template <SourceLocation (SourceRange::*getRangeLoc)() const> struct PPEntityComp { const SourceManager &SM; - explicit PPEntityComp(const SourceManager &SM) : SM(SM) { } + explicit PPEntityComp(const SourceManager &SM) : SM(SM) {} bool operator()(PreprocessedEntity *L, PreprocessedEntity *R) const { SourceLocation LHS = getLoc(L); @@ -190,7 +206,7 @@ struct PPEntityComp { } }; -} +} // namespace unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity( SourceLocation Loc) const { @@ -271,7 +287,7 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { // FM(M1, M2) // \endcode - typedef std::vector<PreprocessedEntity *>::iterator pp_iter; + using pp_iter = std::vector<PreprocessedEntity *>::iterator; // Usually there are few macro expansions when defining the filename, do a // linear search for a few entities. @@ -400,8 +416,9 @@ void PreprocessingRecord::Defined(const Token &MacroNameTok, MacroNameTok.getLocation()); } -void PreprocessingRecord::SourceRangeSkipped(SourceRange Range) { - SkippedRanges.push_back(Range); +void PreprocessingRecord::SourceRangeSkipped(SourceRange Range, + SourceLocation EndifLoc) { + SkippedRanges.emplace_back(Range.getBegin(), EndifLoc); } void PreprocessingRecord::MacroExpands(const Token &Id, @@ -429,7 +446,7 @@ void PreprocessingRecord::MacroUndefined(const Token &Id, void PreprocessingRecord::InclusionDirective( SourceLocation HashLoc, - const clang::Token &IncludeTok, + const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -469,10 +486,10 @@ void PreprocessingRecord::InclusionDirective( EndLoc = EndLoc.getLocWithOffset(-1); // the InclusionDirective expects // a token range. } - clang::InclusionDirective *ID - = new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled, - (bool)Imported, - File, SourceRange(HashLoc, EndLoc)); + clang::InclusionDirective *ID = + new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled, + (bool)Imported, File, + SourceRange(HashLoc, EndLoc)); addPreprocessedEntity(ID); } diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 7979be773aa13..c291a4b99d101 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -1,4 +1,4 @@ -//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// +//===- Preprocess.cpp - C Language Family Preprocessor Implementation -----===// // // The LLVM Compiler Infrastructure // @@ -28,22 +28,33 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/FileSystemStatCache.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/ExternalPreprocessorSource.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/MacroArgs.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/PTHLexer.h" #include "clang/Lex/PTHManager.h" #include "clang/Lex/Pragma.h" #include "clang/Lex/PreprocessingRecord.h" +#include "clang/Lex/PreprocessorLexer.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/ScratchBuffer.h" +#include "clang/Lex/Token.h" +#include "clang/Lex/TokenLexer.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -65,8 +76,7 @@ using namespace clang; LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) -//===----------------------------------------------------------------------===// -ExternalPreprocessorSource::~ExternalPreprocessorSource() { } +ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, DiagnosticsEngine &diags, LangOptions &opts, @@ -74,34 +84,16 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup, bool OwnsHeaders, TranslationUnitKind TUKind) - : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), - AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), + : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), + FileMgr(Headers.getFileMgr()), SourceMgr(SM), PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), Identifiers(opts, IILookup), - PragmaHandlers(new PragmaNamespace(StringRef())), - IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), - CodeCompletionFile(nullptr), CodeCompletionOffset(0), - LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), - CodeCompletionReached(false), CodeCompletionII(nullptr), - MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), - CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), - CurLexerSubmodule(nullptr), Callbacks(nullptr), - CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), - Record(nullptr), MIChainHead(nullptr) { + PragmaHandlers(new PragmaNamespace(StringRef())), TUKind(TUKind), + SkipMainFilePreamble(0, true), + CurSubmoduleState(&NullSubmoduleState) { OwnsHeaderSearch = OwnsHeaders; - CounterValue = 0; // __COUNTER__ starts at 0. - - // Clear stats. - NumDirectives = NumDefined = NumUndefined = NumPragma = 0; - NumIf = NumElse = NumEndif = 0; - NumEnteredSourceFiles = 0; - NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; - NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; - MaxIncludeStackDepth = 0; - NumSkipped = 0; - // Default to discarding comments. KeepComments = false; KeepMacroComments = false; @@ -117,16 +109,20 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, ParsingIfOrElifDirective = false; PreprocessedOutput = false; - CachedLexPos = 0; - // We haven't read anything from the external source. ReadMacrosFromExternalSource = false; - - // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. - // This gets unpoisoned where it is allowed. + + // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of + // a macro. They get unpoisoned where it is allowed. (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); - + if (getLangOpts().CPlusPlus2a) { + (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); + } else { + Ident__VA_OPT__ = nullptr; + } + // Initialize the pragma handlers. RegisterBuiltinPragmas(); @@ -516,9 +512,9 @@ void Preprocessor::EnterMainSourceFile() { // If we've been asked to skip bytes in the main file (e.g., as part of a // precompiled preamble), do so now. if (SkipMainFilePreamble.first > 0) - CurLexer->SkipBytes(SkipMainFilePreamble.first, - SkipMainFilePreamble.second); - + CurLexer->SetByteOffset(SkipMainFilePreamble.first, + SkipMainFilePreamble.second); + // Tell the header info that the main file was entered. If the file is later // #imported, it won't be re-entered. if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) @@ -544,6 +540,13 @@ void Preprocessor::replayPreambleConditionalStack() { "CurPPLexer is null when calling replayPreambleConditionalStack."); CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); PreambleConditionalStack.doneReplaying(); + if (PreambleConditionalStack.reachedEOFWhileSkipping()) + SkipExcludedConditionalBlock( + PreambleConditionalStack.SkipInfo->HashTokenLoc, + PreambleConditionalStack.SkipInfo->IfTokenLoc, + PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, + PreambleConditionalStack.SkipInfo->FoundElse, + PreambleConditionalStack.SkipInfo->ElseLoc); } } @@ -586,7 +589,7 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { Identifier.setIdentifierInfo(II); if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && getSourceManager().isInSystemHeader(Identifier.getLocation())) - Identifier.setKind(clang::tok::identifier); + Identifier.setKind(tok::identifier); else Identifier.setKind(II->getTokenID()); @@ -632,6 +635,8 @@ static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, return llvm::StringSwitch<diag::kind>(II.getName()) #define CXX11_KEYWORD(NAME, FLAGS) \ .Case(#NAME, diag::warn_cxx11_keyword) +#define CXX2A_KEYWORD(NAME, FLAGS) \ + .Case(#NAME, diag::warn_cxx2a_keyword) #include "clang/Basic/TokenKinds.def" ; @@ -665,13 +670,15 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { // unpoisoned it if we're defining a C99 macro. if (II.isOutOfDate()) { bool CurrentIsPoisoned = false; - if (&II == Ident__VA_ARGS__) - CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); + const bool IsSpecialVariadicMacro = + &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; + if (IsSpecialVariadicMacro) + CurrentIsPoisoned = II.isPoisoned(); updateOutOfDateIdentifier(II); Identifier.setKind(II.getTokenID()); - if (&II == Ident__VA_ARGS__) + if (IsSpecialVariadicMacro) II.setIsPoisoned(CurrentIsPoisoned); } @@ -924,8 +931,8 @@ void Preprocessor::addCommentHandler(CommentHandler *Handler) { } void Preprocessor::removeCommentHandler(CommentHandler *Handler) { - std::vector<CommentHandler *>::iterator Pos - = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); + std::vector<CommentHandler *>::iterator Pos = + std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); assert(Pos != CommentHandlers.end() && "Comment handler not registered"); CommentHandlers.erase(Pos); } @@ -944,11 +951,11 @@ bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { return true; } -ModuleLoader::~ModuleLoader() { } +ModuleLoader::~ModuleLoader() = default; -CommentHandler::~CommentHandler() { } +CommentHandler::~CommentHandler() = default; -CodeCompletionHandler::~CodeCompletionHandler() { } +CodeCompletionHandler::~CodeCompletionHandler() = default; void Preprocessor::createPreprocessingRecord() { if (Record) diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index 33ccbc0cfc941..2e85f46f52c59 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -1,4 +1,4 @@ -//===--- PreprocessorLexer.cpp - C Language Family Lexer ------------------===// +//===- PreprocessorLexer.cpp - C Language Family Lexer --------------------===// // // The LLVM Compiler Infrastructure // @@ -15,14 +15,15 @@ #include "clang/Basic/SourceManager.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include <cassert> + using namespace clang; -void PreprocessorLexer::anchor() { } +void PreprocessorLexer::anchor() {} PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) - : PP(pp), FID(fid), InitialNumSLocEntries(0), - ParsingPreprocessorDirective(false), - ParsingFilename(false), LexingRawMode(false) { + : PP(pp), FID(fid) { if (pp) InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size(); } diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp index d1facd9c68796..ec73479cb54f1 100644 --- a/lib/Lex/TokenConcatenation.cpp +++ b/lib/Lex/TokenConcatenation.cpp @@ -99,10 +99,14 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { TokenInfo[tok::utf32_char_constant ] |= aci_custom; } - // These tokens have custom code in C++1z mode. - if (PP.getLangOpts().CPlusPlus1z) + // These tokens have custom code in C++17 mode. + if (PP.getLangOpts().CPlusPlus17) TokenInfo[tok::utf8_char_constant] |= aci_custom; + // These tokens have custom code in C++2a mode. + if (PP.getLangOpts().CPlusPlus2a) + TokenInfo[tok::lessequal ] |= aci_custom_firstchar; + // These tokens change behavior if followed by an '='. TokenInfo[tok::amp ] |= aci_avoid_equal; // &= TokenInfo[tok::plus ] |= aci_avoid_equal; // += @@ -283,5 +287,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, return FirstChar == '#' || FirstChar == '@' || FirstChar == '%'; case tok::arrow: // ->* return PP.getLangOpts().CPlusPlus && FirstChar == '*'; + case tok::lessequal: // <=> (C++2a) + return PP.getLangOpts().CPlusPlus2a && FirstChar == '>'; } } diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index c2e49ba919a93..d7f1c7a93fdae 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -1,4 +1,4 @@ -//===--- TokenLexer.cpp - Lex from a token stream -------------------------===// +//===- TokenLexer.cpp - Lex from a token stream ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,25 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/TokenLexer.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" #include "clang/Lex/MacroArgs.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include "clang/Lex/VariadicMacroSupport.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include <cassert> +#include <cstring> using namespace clang; @@ -31,7 +44,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, Macro = MI; ActualArgs = Actuals; - CurToken = 0; + CurTokenIdx = 0; ExpandLocStart = Tok.getLocation(); ExpandLocEnd = ELEnd; @@ -90,7 +103,7 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks, OwnsTokens = ownsTokens; DisableMacroExpansion = disableMacroExpansion; NumTokens = NumToks; - CurToken = 0; + CurTokenIdx = 0; ExpandLocStart = ExpandLocEnd = SourceLocation(); AtStartOfLine = false; HasLeadingSpace = false; @@ -168,6 +181,59 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( return true; } +void TokenLexer::stringifyVAOPTContents( + SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx, + const SourceLocation VAOPTClosingParenLoc) { + const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt(); + const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt; + Token *const VAOPTTokens = + NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr; + + SmallVector<Token, 64> ConcatenatedVAOPTResultToks; + // FIXME: Should we keep track within VCtx that we did or didnot + // encounter pasting - and only then perform this loop. + + // Perform token pasting (concatenation) prior to stringization. + for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens; + ++CurTokenIdx) { + if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) { + assert(CurTokenIdx != 0 && + "Can not have __VAOPT__ contents begin with a ##"); + Token &LHS = VAOPTTokens[CurTokenIdx - 1]; + pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens), + CurTokenIdx); + // Replace the token prior to the first ## in this iteration. + ConcatenatedVAOPTResultToks.back() = LHS; + if (CurTokenIdx == NumVAOptTokens) + break; + } + ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]); + } + + ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok()); + // Get the SourceLocation that represents the start location within + // the macro definition that marks where this string is substituted + // into: i.e. the __VA_OPT__ and the ')' within the spelling of the + // macro definition, and use it to indicate that the stringified token + // was generated from that location. + const SourceLocation ExpansionLocStartWithinMacro = + getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc()); + const SourceLocation ExpansionLocEndWithinMacro = + getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc); + + Token StringifiedVAOPT = MacroArgs::StringifyArgument( + &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/, + ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro); + + if (VCtx.getLeadingSpaceForStringifiedToken()) + StringifiedVAOPT.setFlag(Token::LeadingSpace); + + StringifiedVAOPT.setFlag(Token::StringifiedInMacro); + // Resize (shrink) the token stream to just capture this stringified token. + ResultToks.resize(NumToksPriorToVAOpt + 1); + ResultToks.back() = StringifiedVAOPT; +} + /// Expand the arguments of a function-like macro so that we can quickly /// return preexpanded tokens from Tokens. void TokenLexer::ExpandFunctionArguments() { @@ -178,28 +244,117 @@ void TokenLexer::ExpandFunctionArguments() { // we install the newly expanded sequence as the new 'Tokens' list. bool MadeChange = false; - for (unsigned i = 0, e = NumTokens; i != e; ++i) { - // If we found the stringify operator, get the argument stringified. The - // preprocessor already verified that the following token is a macro name - // when the #define was parsed. - const Token &CurTok = Tokens[i]; + const bool CalledWithVariadicArguments = + ActualArgs->invokedWithVariadicArgument(Macro); + + VAOptExpansionContext VCtx(PP); + + for (unsigned I = 0, E = NumTokens; I != E; ++I) { + const Token &CurTok = Tokens[I]; // We don't want a space for the next token after a paste // operator. In valid code, the token will get smooshed onto the // preceding one anyway. In assembler-with-cpp mode, invalid // pastes are allowed through: in this case, we do not want the // extra whitespace to be added. For example, we want ". ## foo" // -> ".foo" not ". foo". - if (i != 0 && !Tokens[i-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) + if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) NextTokGetsSpace = true; - if (CurTok.isOneOf(tok::hash, tok::hashat)) { - int ArgNo = Macro->getParameterNum(Tokens[i+1].getIdentifierInfo()); - assert(ArgNo != -1 && "Token following # is not an argument?"); + if (VCtx.isVAOptToken(CurTok)) { + MadeChange = true; + assert(Tokens[I + 1].is(tok::l_paren) && + "__VA_OPT__ must be followed by '('"); + + ++I; // Skip the l_paren + VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(), + ResultToks.size()); + + continue; + } + // We have entered into the __VA_OPT__ context, so handle tokens + // appropriately. + if (VCtx.isInVAOpt()) { + // If we are about to process a token that is either an argument to + // __VA_OPT__ or its closing rparen, then: + // 1) If the token is the closing rparen that exits us out of __VA_OPT__, + // perform any necessary stringification or placemarker processing, + // and/or skip to the next token. + // 2) else if macro was invoked without variadic arguments skip this + // token. + // 3) else (macro was invoked with variadic arguments) process the token + // normally. + + if (Tokens[I].is(tok::l_paren)) + VCtx.sawOpeningParen(Tokens[I].getLocation()); + // Continue skipping tokens within __VA_OPT__ if the macro was not + // called with variadic arguments, else let the rest of the loop handle + // this token. Note sawClosingParen() returns true only if the r_paren matches + // the closing r_paren of the __VA_OPT__. + if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) { + if (!CalledWithVariadicArguments) { + // Skip this token. + continue; + } + // ... else the macro was called with variadic arguments, and we do not + // have a closing rparen - so process this token normally. + } else { + // Current token is the closing r_paren which marks the end of the + // __VA_OPT__ invocation, so handle any place-marker pasting (if + // empty) by removing hashhash either before (if exists) or after. And + // also stringify the entire contents if VAOPT was preceded by a hash, + // but do so only after any token concatenation that needs to occur + // within the contents of VAOPT. + + if (VCtx.hasStringifyOrCharifyBefore()) { + // Replace all the tokens just added from within VAOPT into a single + // stringified token. This requires token-pasting to eagerly occur + // within these tokens. If either the contents of VAOPT were empty + // or the macro wasn't called with any variadic arguments, the result + // is a token that represents an empty string. + stringifyVAOPTContents(ResultToks, VCtx, + /*ClosingParenLoc*/ Tokens[I].getLocation()); + + } else if (/*No tokens within VAOPT*/ !( + ResultToks.size() - VCtx.getNumberOfTokensPriorToVAOpt())) { + // Treat VAOPT as a placemarker token. Eat either the '##' before the + // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that + // hashhash was not a placemarker) or the '##' + // after VAOPT, but not both. + + if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) { + ResultToks.pop_back(); + } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) { + ++I; // Skip the following hashhash. + } + } + VCtx.reset(); + // We processed __VA_OPT__'s closing paren (and the exit out of + // __VA_OPT__), so skip to the next token. + continue; + } + } + + // If we found the stringify operator, get the argument stringified. The + // preprocessor already verified that the following token is a macro + // parameter or __VA_OPT__ when the #define was lexed. + + if (CurTok.isOneOf(tok::hash, tok::hashat)) { + int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo()); + assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) && + "Token following # is not an argument or __VA_OPT__!"); + + if (ArgNo == -1) { + // Handle the __VA_OPT__ case. + VCtx.sawHashOrHashAtBefore(NextTokGetsSpace, + CurTok.is(tok::hashat)); + continue; + } + // Else handle the simple argument case. SourceLocation ExpansionLocStart = getExpansionLocForMacroDefLoc(CurTok.getLocation()); SourceLocation ExpansionLocEnd = - getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation()); + getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation()); Token Res; if (CurTok.is(tok::hash)) // Stringify @@ -222,7 +377,7 @@ void TokenLexer::ExpandFunctionArguments() { ResultToks.push_back(Res); MadeChange = true; - ++i; // Skip arg name. + ++I; // Skip arg name. NextTokGetsSpace = false; continue; } @@ -230,9 +385,11 @@ void TokenLexer::ExpandFunctionArguments() { // Find out if there is a paste (##) operator before or after the token. bool NonEmptyPasteBefore = !ResultToks.empty() && ResultToks.back().is(tok::hashhash); - bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash); - bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash); - assert(!NonEmptyPasteBefore || PasteBefore); + bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash); + bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash); + + assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) && + "unexpected ## in ResultToks"); // Otherwise, if this is not an argument token, just add the token to the // output buffer. @@ -275,7 +432,7 @@ void TokenLexer::ExpandFunctionArguments() { // avoids some work in common cases. const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) - ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0]; + ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; else ResultArgToks = ArgTok; // Use non-preexpanded tokens. @@ -374,7 +531,7 @@ void TokenLexer::ExpandFunctionArguments() { if (PasteAfter) { // Discard the argument token and skip (don't copy to the expansion // buffer) the paste operator after it. - ++i; + ++I; continue; } @@ -384,7 +541,13 @@ void TokenLexer::ExpandFunctionArguments() { assert(PasteBefore); if (NonEmptyPasteBefore) { assert(ResultToks.back().is(tok::hashhash)); - ResultToks.pop_back(); + // Do not remove the paste operator if it is the one before __VA_OPT__ + // (and we are still processing tokens within VA_OPT). We handle the case + // of removing the paste operator if __VA_OPT__ reduces to the notional + // placemarker above when we encounter the closing paren of VA_OPT. + if (!VCtx.isInVAOpt() || + ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt()) + ResultToks.pop_back(); } // If this is the __VA_ARGS__ token, and if the argument wasn't provided, @@ -420,7 +583,6 @@ static bool isWideStringLiteralFromMacro(const Token &FirstTok, } /// Lex - Lex and return a token from this macro stream. -/// bool TokenLexer::Lex(Token &Tok) { // Lexing off the end of the macro, pop this macro off the expansion stack. if (isAtEnd()) { @@ -431,7 +593,7 @@ bool TokenLexer::Lex(Token &Tok) { Tok.startToken(); Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace); - if (CurToken == 0) + if (CurTokenIdx == 0) Tok.setFlag(Token::LeadingEmptyMacro); return PP.HandleEndOfTokenLexer(Tok); } @@ -440,25 +602,25 @@ bool TokenLexer::Lex(Token &Tok) { // If this is the first token of the expanded result, we inherit spacing // properties later. - bool isFirstToken = CurToken == 0; + bool isFirstToken = CurTokenIdx == 0; // Get the next token to return. - Tok = Tokens[CurToken++]; + Tok = Tokens[CurTokenIdx++]; bool TokenIsFromPaste = false; // If this token is followed by a token paste (##) operator, paste the tokens! // Note that ## is a normal token when not expanding a macro. if (!isAtEnd() && Macro && - (Tokens[CurToken].is(tok::hashhash) || + (Tokens[CurTokenIdx].is(tok::hashhash) || // Special processing of L#x macros in -fms-compatibility mode. // Microsoft compiler is able to form a wide string literal from // 'L#macro_arg' construct in a function-like macro. (PP.getLangOpts().MSVCCompat && - isWideStringLiteralFromMacro(Tok, Tokens[CurToken])))) { + isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) { // When handling the microsoft /##/ extension, the final token is - // returned by PasteTokens, not the pasted token. - if (PasteTokens(Tok)) + // returned by pasteTokens, not the pasted token. + if (pasteTokens(Tok)) return true; TokenIsFromPaste = true; @@ -521,40 +683,57 @@ bool TokenLexer::Lex(Token &Tok) { return true; } -/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## +bool TokenLexer::pasteTokens(Token &Tok) { + return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx); +} + +/// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ## /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there -/// are more ## after it, chomp them iteratively. Return the result as Tok. +/// are more ## after it, chomp them iteratively. Return the result as LHSTok. /// If this returns true, the caller should immediately return the token. -bool TokenLexer::PasteTokens(Token &Tok) { +bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, + unsigned int &CurIdx) { + assert(CurIdx > 0 && "## can not be the first token within tokens"); + assert((TokenStream[CurIdx].is(tok::hashhash) || + (PP.getLangOpts().MSVCCompat && + isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) && + "Token at this Index must be ## or part of the MSVC 'L " + "#macro-arg' pasting pair"); + // MSVC: If previous token was pasted, this must be a recovery from an invalid // paste operation. Ignore spaces before this token to mimic MSVC output. // Required for generating valid UUID strings in some MS headers. - if (PP.getLangOpts().MicrosoftExt && (CurToken >= 2) && - Tokens[CurToken - 2].is(tok::hashhash)) - Tok.clearFlag(Token::LeadingSpace); + if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) && + TokenStream[CurIdx - 2].is(tok::hashhash)) + LHSTok.clearFlag(Token::LeadingSpace); SmallString<128> Buffer; const char *ResultTokStrPtr = nullptr; - SourceLocation StartLoc = Tok.getLocation(); + SourceLocation StartLoc = LHSTok.getLocation(); SourceLocation PasteOpLoc; + + auto IsAtEnd = [&TokenStream, &CurIdx] { + return TokenStream.size() == CurIdx; + }; + do { // Consume the ## operator if any. - PasteOpLoc = Tokens[CurToken].getLocation(); - if (Tokens[CurToken].is(tok::hashhash)) - ++CurToken; - assert(!isAtEnd() && "No token on the RHS of a paste operator!"); + PasteOpLoc = TokenStream[CurIdx].getLocation(); + if (TokenStream[CurIdx].is(tok::hashhash)) + ++CurIdx; + assert(!IsAtEnd() && "No token on the RHS of a paste operator!"); // Get the RHS token. - const Token &RHS = Tokens[CurToken]; + const Token &RHS = TokenStream[CurIdx]; // Allocate space for the result token. This is guaranteed to be enough for // the two tokens. - Buffer.resize(Tok.getLength() + RHS.getLength()); + Buffer.resize(LHSTok.getLength() + RHS.getLength()); // Get the spelling of the LHS token in Buffer. const char *BufPtr = &Buffer[0]; bool Invalid = false; - unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid); + unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid); if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! memcpy(&Buffer[0], BufPtr, LHSLen); if (Invalid) @@ -586,7 +765,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Lex the resultant pasted token into Result. Token Result; - if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) { + if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) { // Common paste case: identifier+identifier = identifier. Avoid creating // a lexer and other overhead. PP.IncrementPasteCounter(true); @@ -626,7 +805,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { isInvalid |= Result.is(tok::eof); // If pasting the two tokens didn't form a full new token, this is an - // error. This occurs with "x ## +" and other stuff. Return with Tok + // error. This occurs with "x ## +" and other stuff. Return with LHSTok // unmodified and with RHS as the next token to lex. if (isInvalid) { // Explicitly convert the token location to have proper expansion @@ -637,9 +816,9 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Test for the Microsoft extension of /##/ turning into // here on the // error path. - if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) && + if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) && RHS.is(tok::slash)) { - HandleMicrosoftCommentPaste(Tok, Loc); + HandleMicrosoftCommentPaste(LHSTok, Loc); return true; } @@ -664,15 +843,15 @@ bool TokenLexer::PasteTokens(Token &Tok) { } // Transfer properties of the LHS over the Result. - Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); - Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); + Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine()); + Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace()); // Finally, replace LHS with the result, consume the RHS, and iterate. - ++CurToken; - Tok = Result; - } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); + ++CurIdx; + LHSTok = Result; + } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash)); - SourceLocation EndLoc = Tokens[CurToken - 1].getLocation(); + SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation(); // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any @@ -690,16 +869,16 @@ bool TokenLexer::PasteTokens(Token &Tok) { while (SM.getFileID(EndLoc) != MacroFID) EndLoc = SM.getImmediateExpansionRange(EndLoc).second; - Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc, - Tok.getLength())); + LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc, + LHSTok.getLength())); // Now that we got the result token, it will be subject to expansion. Since // token pasting re-lexes the result token in raw mode, identifier information // isn't looked up. As such, if the result is an identifier, look up id info. - if (Tok.is(tok::raw_identifier)) { + if (LHSTok.is(tok::raw_identifier)) { // Look up the identifier info for the token. We disabled identifier lookup // by saying we're skipping contents, so we need to do this manually. - PP.LookUpIdentifierInfo(Tok); + PP.LookUpIdentifierInfo(LHSTok); } return false; } @@ -711,7 +890,7 @@ unsigned TokenLexer::isNextTokenLParen() const { // Out of tokens? if (isAtEnd()) return 2; - return Tokens[CurToken].is(tok::l_paren); + return Tokens[CurTokenIdx].is(tok::l_paren); } /// isParsingPreprocessorDirective - Return true if we are in the middle of a @@ -831,9 +1010,8 @@ static void updateConsecutiveMacroArgTokens(SourceManager &SM, /// \brief Creates SLocEntries and updates the locations of macro argument /// tokens to their new expanded locations. /// -/// \param ArgIdDefLoc the location of the macro argument id inside the macro +/// \param ArgIdSpellLoc the location of the macro argument id inside the macro /// definition. -/// \param Tokens the macro argument tokens to update. void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, Token *begin_tokens, Token *end_tokens) { |