diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2011-10-20 21:14:49 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2011-10-20 21:14:49 +0000 |
commit | 36981b17ed939300f6f8fc2355a255f711fcef71 (patch) | |
tree | ee2483e98b09cac943dc93a6969d83ca737ff139 /lib/Lex | |
parent | 180abc3db9ae3b4fc63cd65b15697e6ffcc8a657 (diff) | |
download | src-36981b17ed939300f6f8fc2355a255f711fcef71.tar.gz src-36981b17ed939300f6f8fc2355a255f711fcef71.zip |
Notes
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/HeaderMap.cpp | 4 | ||||
-rw-r--r-- | lib/Lex/HeaderSearch.cpp | 208 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 650 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 394 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.cpp | 48 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.h | 13 | ||||
-rw-r--r-- | lib/Lex/MacroInfo.cpp | 6 | ||||
-rw-r--r-- | lib/Lex/PPCaching.cpp | 2 | ||||
-rw-r--r-- | lib/Lex/PPDirectives.cpp | 140 | ||||
-rw-r--r-- | lib/Lex/PPExpressions.cpp | 56 | ||||
-rw-r--r-- | lib/Lex/PPLexerChange.cpp | 64 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 182 | ||||
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 19 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 95 | ||||
-rw-r--r-- | lib/Lex/PreprocessingRecord.cpp | 283 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 268 | ||||
-rw-r--r-- | lib/Lex/PreprocessorLexer.cpp | 8 | ||||
-rw-r--r-- | lib/Lex/ScratchBuffer.cpp | 2 | ||||
-rw-r--r-- | lib/Lex/TokenConcatenation.cpp | 72 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 219 |
20 files changed, 1983 insertions, 750 deletions
diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp index e102a6da608c..0cb564c222d5 100644 --- a/lib/Lex/HeaderMap.cpp +++ b/lib/Lex/HeaderMap.cpp @@ -57,7 +57,7 @@ struct HMapHeader { /// HashHMapKey - This is the 'well known' hash function required by the file /// format, used to look up keys in the hash table. The hash table uses simple /// linear probing based on this function. -static inline unsigned HashHMapKey(llvm::StringRef Str) { +static inline unsigned HashHMapKey(StringRef Str) { unsigned Result = 0; const char *S = Str.begin(), *End = Str.end(); @@ -200,7 +200,7 @@ void HeaderMap::dump() const { /// LookupFile - Check to see if the specified relative filename is located in /// this HeaderMap. If so, open it and return its FileEntry. const FileEntry *HeaderMap::LookupFile( - llvm::StringRef Filename, FileManager &FM) const { + StringRef Filename, FileManager &FM) const { const HMapHeader &Hdr = getHeader(); unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index 86ab9564a235..931145a8d655 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Capacity.h" #include <cstdio> using namespace clang; @@ -97,6 +98,60 @@ const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) { return 0; } +const FileEntry *HeaderSearch::lookupModule(StringRef ModuleName, + std::string *ModuleFileName, + std::string *UmbrellaHeader) { + // If we don't have a module cache path, we can't do anything. + if (ModuleCachePath.empty()) { + if (ModuleFileName) + ModuleFileName->clear(); + return 0; + } + + // Try to find the module path. + llvm::SmallString<256> FileName(ModuleCachePath); + llvm::sys::path::append(FileName, ModuleName + ".pcm"); + if (ModuleFileName) + *ModuleFileName = FileName.str(); + + if (const FileEntry *ModuleFile + = getFileMgr().getFile(FileName, /*OpenFile=*/false, + /*CacheFailure=*/false)) + return ModuleFile; + + // We didn't find the module. If we're not supposed to look for an + // umbrella header, this is the end of the road. + if (!UmbrellaHeader) + return 0; + + // Look in each of the framework directories for an umbrella header with + // the same name as the module. + // FIXME: We need a way for non-frameworks to provide umbrella headers. + llvm::SmallString<128> UmbrellaHeaderName; + UmbrellaHeaderName = ModuleName; + UmbrellaHeaderName += '/'; + UmbrellaHeaderName += ModuleName; + UmbrellaHeaderName += ".h"; + for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { + // Skip non-framework include paths + if (!SearchDirs[Idx].isFramework()) + continue; + + // Look for the umbrella header in this directory. + if (const FileEntry *HeaderFile + = SearchDirs[Idx].LookupFile(UmbrellaHeaderName, *this, 0, 0, + StringRef(), 0)) { + *UmbrellaHeader = HeaderFile->getName(); + return 0; + } + } + + // We did not find an umbrella header. Clear out the UmbrellaHeader pointee + // so our caller knows that we failed. + UmbrellaHeader->clear(); + return 0; +} + //===----------------------------------------------------------------------===// // File lookup within a DirectoryLookup scope //===----------------------------------------------------------------------===// @@ -116,17 +171,19 @@ const char *DirectoryLookup::getName() const { /// LookupFile - Lookup the specified file in this search path, returning it /// if it exists or returning null if not. const FileEntry *DirectoryLookup::LookupFile( - llvm::StringRef Filename, + StringRef Filename, HeaderSearch &HS, - llvm::SmallVectorImpl<char> *SearchPath, - llvm::SmallVectorImpl<char> *RelativePath) const { + SmallVectorImpl<char> *SearchPath, + SmallVectorImpl<char> *RelativePath, + StringRef BuildingModule, + StringRef *SuggestedModule) const { llvm::SmallString<1024> TmpDir; if (isNormalDir()) { // Concatenate the requested file onto the directory. TmpDir = getDir()->getName(); llvm::sys::path::append(TmpDir, Filename); if (SearchPath != NULL) { - llvm::StringRef SearchPathRef(getDir()->getName()); + StringRef SearchPathRef(getDir()->getName()); SearchPath->clear(); SearchPath->append(SearchPathRef.begin(), SearchPathRef.end()); } @@ -138,14 +195,15 @@ const FileEntry *DirectoryLookup::LookupFile( } if (isFramework()) - return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath); + return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath, + BuildingModule, SuggestedModule); assert(isHeaderMap() && "Unknown directory lookup"); const FileEntry * const Result = getHeaderMap()->LookupFile( Filename, HS.getFileMgr()); if (Result) { if (SearchPath != NULL) { - llvm::StringRef SearchPathRef(getName()); + StringRef SearchPathRef(getName()); SearchPath->clear(); SearchPath->append(SearchPathRef.begin(), SearchPathRef.end()); } @@ -161,15 +219,18 @@ const FileEntry *DirectoryLookup::LookupFile( /// DoFrameworkLookup - Do a lookup of the specified file in the current /// DirectoryLookup, which is a framework directory. const FileEntry *DirectoryLookup::DoFrameworkLookup( - llvm::StringRef Filename, + StringRef Filename, HeaderSearch &HS, - llvm::SmallVectorImpl<char> *SearchPath, - llvm::SmallVectorImpl<char> *RelativePath) const { + SmallVectorImpl<char> *SearchPath, + SmallVectorImpl<char> *RelativePath, + StringRef BuildingModule, + StringRef *SuggestedModule) const +{ FileManager &FileMgr = HS.getFileMgr(); // Framework names must have a '/' in the filename. size_t SlashPos = Filename.find('/'); - if (SlashPos == llvm::StringRef::npos) return 0; + if (SlashPos == StringRef::npos) return 0; // Find out if this is the home for the specified framework, by checking // HeaderSearch. Possible answer are yes/no and unknown. @@ -226,9 +287,16 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1); } + /// Determine whether this is the module we're building or not. + bool AutomaticImport = SuggestedModule && + (BuildingModule != StringRef(Filename.begin(), SlashPos)) && + !Filename.substr(SlashPos + 1).startswith(".."); + FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end()); if (const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), - /*openFile=*/true)) { + /*openFile=*/!AutomaticImport)) { + if (AutomaticImport) + *SuggestedModule = StringRef(Filename.begin(), SlashPos); return FE; } @@ -240,7 +308,11 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( SearchPath->insert(SearchPath->begin()+OrigSize, Private, Private+strlen(Private)); - return FileMgr.getFile(FrameworkName.str(), /*openFile=*/true); + const FileEntry *FE = FileMgr.getFile(FrameworkName.str(), + /*openFile=*/!AutomaticImport); + if (FE && AutomaticImport) + *SuggestedModule = StringRef(Filename.begin(), SlashPos); + return FE; } @@ -255,13 +327,18 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( /// non-null, indicates where the #including file is, in case a relative search /// is needed. const FileEntry *HeaderSearch::LookupFile( - llvm::StringRef Filename, + StringRef Filename, bool isAngled, const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir, const FileEntry *CurFileEnt, - llvm::SmallVectorImpl<char> *SearchPath, - llvm::SmallVectorImpl<char> *RelativePath) { + SmallVectorImpl<char> *SearchPath, + SmallVectorImpl<char> *RelativePath, + StringRef *SuggestedModule) +{ + if (SuggestedModule) + *SuggestedModule = StringRef(); + // If 'Filename' is absolute, check to see if it exists and no searching. if (llvm::sys::path::is_absolute(Filename)) { CurDir = 0; @@ -279,7 +356,7 @@ const FileEntry *HeaderSearch::LookupFile( return FileMgr.getFile(Filename, /*openFile=*/true); } - // Step #0, unless disabled, check to see if the file is in the #includer's + // Unless disabled, check to see if the file is in the #includer's // directory. This has to be based on CurFileEnt, not CurDir, because // CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and // a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h". @@ -301,7 +378,7 @@ const FileEntry *HeaderSearch::LookupFile( unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo; getFileInfo(FE).DirInfo = DirInfo; if (SearchPath != NULL) { - llvm::StringRef SearchPathRef(CurFileEnt->getDir()->getName()); + StringRef SearchPathRef(CurFileEnt->getDir()->getName()); SearchPath->clear(); SearchPath->append(SearchPathRef.begin(), SearchPathRef.end()); } @@ -346,19 +423,56 @@ const FileEntry *HeaderSearch::LookupFile( // Check each directory in sequence to see if it contains this file. for (; i != SearchDirs.size(); ++i) { const FileEntry *FE = - SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath); + SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath, + BuildingModule, SuggestedModule); if (!FE) continue; CurDir = &SearchDirs[i]; // This file is a system header or C++ unfriendly if the dir is. - getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic(); - + HeaderFileInfo &HFI = getFileInfo(FE); + HFI.DirInfo = CurDir->getDirCharacteristic(); + + // If this file is found in a header map and uses the framework style of + // includes, then this header is part of a framework we're building. + if (CurDir->isIndexHeaderMap()) { + size_t SlashPos = Filename.find('/'); + if (SlashPos != StringRef::npos) { + HFI.IndexHeaderMapHeader = 1; + HFI.Framework = getUniqueFrameworkName(StringRef(Filename.begin(), + SlashPos)); + } + } + // Remember this location for the next lookup we do. CacheLookup.second = i; return FE; } + // If we are including a file with a quoted include "foo.h" from inside + // a header in a framework that is currently being built, and we couldn't + // resolve "foo.h" any other way, change the include to <Foo/foo.h>, where + // "Foo" is the name of the framework in which the including header was found. + if (CurFileEnt && !isAngled && Filename.find('/') == StringRef::npos) { + HeaderFileInfo &IncludingHFI = getFileInfo(CurFileEnt); + if (IncludingHFI.IndexHeaderMapHeader) { + llvm::SmallString<128> ScratchFilename; + ScratchFilename += IncludingHFI.Framework; + ScratchFilename += '/'; + ScratchFilename += Filename; + + const FileEntry *Result = LookupFile(ScratchFilename, /*isAngled=*/true, + FromDir, CurDir, CurFileEnt, + SearchPath, RelativePath, + SuggestedModule); + std::pair<unsigned, unsigned> &CacheLookup + = LookupFileCache.GetOrCreateValue(Filename).getValue(); + CacheLookup.second + = LookupFileCache.GetOrCreateValue(ScratchFilename).getValue().second; + return Result; + } + } + // Otherwise, didn't find it. Remember we didn't find this. CacheLookup.second = SearchDirs.size(); return 0; @@ -370,15 +484,15 @@ const FileEntry *HeaderSearch::LookupFile( /// is a subframework within Carbon.framework. If so, return the FileEntry /// for the designated file, otherwise return null. const FileEntry *HeaderSearch:: -LookupSubframeworkHeader(llvm::StringRef Filename, +LookupSubframeworkHeader(StringRef Filename, const FileEntry *ContextFileEnt, - llvm::SmallVectorImpl<char> *SearchPath, - llvm::SmallVectorImpl<char> *RelativePath) { + SmallVectorImpl<char> *SearchPath, + SmallVectorImpl<char> *RelativePath) { assert(ContextFileEnt && "No context file?"); // Framework names must have a '/' in the filename. Find it. size_t SlashPos = Filename.find('/'); - if (SlashPos == llvm::StringRef::npos) return 0; + if (SlashPos == StringRef::npos) return 0; // Look up the base framework name of the ContextFileEnt. const char *ContextName = ContextFileEnt->getName(); @@ -466,7 +580,31 @@ LookupSubframeworkHeader(llvm::StringRef Filename, // File Info Management. //===----------------------------------------------------------------------===// +/// \brief Merge the header file info provided by \p OtherHFI into the current +/// header file info (\p HFI) +static void mergeHeaderFileInfo(HeaderFileInfo &HFI, + const HeaderFileInfo &OtherHFI) { + HFI.isImport |= OtherHFI.isImport; + HFI.isPragmaOnce |= OtherHFI.isPragmaOnce; + HFI.NumIncludes += OtherHFI.NumIncludes; + + if (!HFI.ControllingMacro && !HFI.ControllingMacroID) { + HFI.ControllingMacro = OtherHFI.ControllingMacro; + HFI.ControllingMacroID = OtherHFI.ControllingMacroID; + } + + if (OtherHFI.External) { + HFI.DirInfo = OtherHFI.DirInfo; + HFI.External = OtherHFI.External; + HFI.IndexHeaderMapHeader = OtherHFI.IndexHeaderMapHeader; + } + if (HFI.Framework.empty()) + HFI.Framework = OtherHFI.Framework; + + HFI.Resolved = true; +} + /// getFileInfo - Return the HeaderFileInfo structure for the specified /// FileEntry. HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) { @@ -474,10 +612,8 @@ HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) { FileInfo.resize(FE->getUID()+1); HeaderFileInfo &HFI = FileInfo[FE->getUID()]; - if (ExternalSource && !HFI.Resolved) { - HFI = ExternalSource->GetHeaderFileInfo(FE); - HFI.Resolved = true; - } + if (ExternalSource && !HFI.Resolved) + mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(FE)); return HFI; } @@ -488,10 +624,8 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) { // Resolve header file info from the external source, if needed. HeaderFileInfo &HFI = FileInfo[File->getUID()]; - if (ExternalSource && !HFI.Resolved) { - HFI = ExternalSource->GetHeaderFileInfo(File); - HFI.Resolved = true; - } + if (ExternalSource && !HFI.Resolved) + mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(File)); return HFI.isPragmaOnce || HFI.ControllingMacro || HFI.ControllingMacroID; } @@ -542,4 +676,14 @@ bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ return true; } +size_t HeaderSearch::getTotalMemory() const { + return SearchDirs.capacity() + + llvm::capacity_in_bytes(FileInfo) + + llvm::capacity_in_bytes(HeaderMaps) + + LookupFileCache.getAllocator().getTotalMemory() + + FrameworkMap.getAllocator().getTotalMemory(); +} +StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) { + return FrameworkNames.GetOrCreateValue(Framework).getKey(); +} diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index a28b8f6e7b9f..a98d889dbc98 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -32,7 +32,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" -#include <cctype> +#include <cstring> using namespace clang; static void InitCharacterInfo(); @@ -76,7 +76,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, // skip the UTF-8 BOM if it's present. if (BufferStart == BufferPtr) { // Determine the size of the BOM. - llvm::StringRef Buf(BufferStart, BufferEnd - BufferStart); + StringRef Buf(BufferStart, BufferEnd - BufferStart); size_t BOMLength = llvm::StringSwitch<size_t>(Buf) .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM .Default(0); @@ -86,7 +86,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr, } Is_PragmaLexer = false; - IsInConflictMarker = false; + CurrentConflictMarkerState = CMK_None; // Start of the file is a start of line. IsAtStartOfLine = true; @@ -187,9 +187,9 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, // Set the SourceLocation with the remapping information. This ensures that // GetMappedTokenLoc will remap the tokens as they are lexed. - L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID), - ExpansionLocStart, - ExpansionLocEnd, TokLen); + L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID), + ExpansionLocStart, + ExpansionLocEnd, TokLen); // Ensure that the lexer thinks it is inside a directive, so that end \n will // return an EOD token. @@ -217,7 +217,7 @@ std::string Lexer::Stringify(const std::string &Str, bool Charify) { /// Stringify - Convert the specified string into a C string by escaping '\' /// and " characters. This does not add surrounding ""'s to the string. -void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) { +void Lexer::Stringify(SmallVectorImpl<char> &Str) { for (unsigned i = 0, e = Str.size(); i != e; ++i) { if (Str[i] == '\\' || Str[i] == '"') { Str.insert(Str.begin()+i, '\\'); @@ -235,8 +235,8 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) { /// after trigraph expansion and escaped-newline folding. In particular, this /// wants to get the true, uncanonicalized, spelling of things like digraphs /// UCNs, etc. -llvm::StringRef Lexer::getSpelling(SourceLocation loc, - llvm::SmallVectorImpl<char> &buffer, +StringRef Lexer::getSpelling(SourceLocation loc, + SmallVectorImpl<char> &buffer, const SourceManager &SM, const LangOptions &options, bool *invalid) { @@ -245,10 +245,10 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc, // Try to the load the file buffer. bool invalidTemp = false; - llvm::StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); + StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); if (invalidTemp) { if (invalid) *invalid = true; - return llvm::StringRef(); + return StringRef(); } const char *tokenBegin = file.data() + locInfo.second; @@ -263,7 +263,7 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc, // Common case: no need for cleaning. if (!token.needsCleaning()) - return llvm::StringRef(tokenBegin, length); + return StringRef(tokenBegin, length); // Hard case, we need to relex the characters into the string. buffer.clear(); @@ -275,7 +275,7 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc, ti += charSize; } - return llvm::StringRef(buffer.data(), buffer.size()); + return StringRef(buffer.data(), buffer.size()); } /// getSpelling() - Return the 'spelling' of this token. The spelling of a @@ -394,10 +394,10 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, // If this comes from a macro expansion, we really do want the macro name, not // the token this macro expanded to. - Loc = SM.getInstantiationLoc(Loc); + Loc = SM.getExpansionLoc(Loc); std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); bool Invalid = false; - llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); + StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) return 0; @@ -415,15 +415,16 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, return TheTok.getLength(); } -SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, - const SourceManager &SM, - const LangOptions &LangOpts) { +static SourceLocation getBeginningOfFileToken(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { + assert(Loc.isFileID()); std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); if (LocInfo.first.isInvalid()) return Loc; bool Invalid = false; - llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); + StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) return Loc; @@ -448,7 +449,7 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, } // Create a lexer starting at the beginning of this token. - SourceLocation LexerStartLoc = Loc.getFileLocWithOffset(-LocInfo.second); + SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end()); TheLexer.SetCommentRetentionState(true); @@ -474,6 +475,25 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, return Loc; } +SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { + if (Loc.isFileID()) + return getBeginningOfFileToken(Loc, SM, LangOpts); + + if (!SM.isMacroArgExpansion(Loc)) + return Loc; + + SourceLocation FileLoc = SM.getSpellingLoc(Loc); + SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); + std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); + std::pair<FileID, unsigned> BeginFileLocInfo= SM.getDecomposedLoc(BeginFileLoc); + assert(FileLocInfo.first == BeginFileLocInfo.first && + FileLocInfo.second >= BeginFileLocInfo.second); + return Loc.getLocWithOffset(SM.getDecomposedLoc(BeginFileLoc).second - + SM.getDecomposedLoc(FileLoc).second); +} + namespace { enum PreambleDirectiveKind { PDK_Skipped, @@ -484,21 +504,36 @@ namespace { } std::pair<unsigned, bool> -Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) { +Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, + const LangOptions &Features, unsigned MaxLines) { // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. const unsigned StartOffset = 1; SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset); - LangOptions LangOpts; - Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(), + Lexer TheLexer(StartLoc, Features, Buffer->getBufferStart(), Buffer->getBufferStart(), Buffer->getBufferEnd()); bool InPreprocessorDirective = false; Token TheTok; Token IfStartTok; unsigned IfCount = 0; - unsigned Line = 0; + + unsigned MaxLineOffset = 0; + if (MaxLines) { + const char *CurPtr = Buffer->getBufferStart(); + unsigned CurLine = 0; + while (CurPtr != Buffer->getBufferEnd()) { + char ch = *CurPtr++; + if (ch == '\n') { + ++CurLine; + if (CurLine == MaxLines) + break; + } + } + if (CurPtr != Buffer->getBufferEnd()) + MaxLineOffset = CurPtr - Buffer->getBufferStart(); + } do { TheLexer.LexFromRawLexer(TheTok); @@ -522,11 +557,11 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) { // Keep track of the # of lines in the preamble. if (TheTok.isAtStartOfLine()) { - ++Line; + unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset; // If we were asked to limit the number of lines in the preamble, // and we're about to exceed that limit, we're done. - if (MaxLines && Line >= MaxLines) + if (MaxLineOffset && TokOffset >= MaxLineOffset) break; } @@ -539,12 +574,12 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) { Token HashTok = TheTok; InPreprocessorDirective = true; - // Figure out which direective this is. Since we're lexing raw tokens, + // Figure out which directive this is. Since we're lexing raw tokens, // we don't have an identifier table available. Instead, just look at // the raw identifier to recognize and categorize preprocessor directives. TheLexer.LexFromRawLexer(TheTok); if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) { - llvm::StringRef Keyword(TheTok.getRawIdentifierData(), + StringRef Keyword(TheTok.getRawIdentifierData(), TheTok.getLength()); PreambleDirectiveKind PDK = llvm::StringSwitch<PreambleDirectiveKind>(Keyword) @@ -638,7 +673,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, // chars, this method is extremely fast. while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { if (CharNo == 0) - return TokStart.getFileLocWithOffset(PhysOffset); + return TokStart.getLocWithOffset(PhysOffset); ++TokPtr, --CharNo, ++PhysOffset; } @@ -658,7 +693,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; - return TokStart.getFileLocWithOffset(PhysOffset); + return TokStart.getLocWithOffset(PhysOffset); } /// \brief Computes the source location just past the end of the @@ -687,7 +722,7 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, return SourceLocation(); // Points inside the macro expansion. // Continue and find the location just after the macro expansion. - Loc = SM.getInstantiationRange(Loc).second; + Loc = SM.getExpansionRange(Loc).second; } unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features); @@ -696,14 +731,14 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, else return Loc; - return Loc.getFileLocWithOffset(Len); + return Loc.getLocWithOffset(Len); } /// \brief Returns true if the given MacroID location points at the first /// token of the macro expansion. bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, - const SourceManager &SM, - const LangOptions &LangOpts) { + const SourceManager &SM, + const LangOptions &LangOpts) { assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc); @@ -713,8 +748,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, return false; // Does not point at the start of token. SourceLocation expansionLoc = - SM.getSLocEntry(infoLoc.first) - .getInstantiation().getInstantiationLocStart(); + SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart(); if (expansionLoc.isFileID()) return true; // No other macro expansions, this is the first. @@ -734,17 +768,15 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, return false; FileID FID = SM.getFileID(loc); - SourceLocation afterLoc = loc.getFileLocWithOffset(tokLen+1); - if (!SM.isBeforeInSourceLocationOffset(afterLoc, SM.getNextOffset())) - return true; // We got past the last FileID, this points to the last token. + SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1); + if (SM.isInFileID(afterLoc, FID)) + return false; // Still in the same FileID, does not point to the last token. // FIXME: If the token comes from the macro token paste operator ('##') // or the stringify operator ('#') this function will always return false; - if (FID == SM.getFileID(afterLoc)) - return false; // Still in the same FileID, does not point to the last token. - + SourceLocation expansionLoc = - SM.getSLocEntry(FID).getInstantiation().getInstantiationLocEnd(); + SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd(); if (expansionLoc.isFileID()) return true; // No other macro expansions. @@ -761,7 +793,8 @@ enum { CHAR_LETTER = 0x04, // a-z,A-Z CHAR_NUMBER = 0x08, // 0-9 CHAR_UNDER = 0x10, // _ - CHAR_PERIOD = 0x20 // . + CHAR_PERIOD = 0x20, // . + CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"' }; // Statically initialize CharInfo table based on ASCII character set @@ -786,20 +819,20 @@ static const unsigned char CharInfo[256] = 0 , 0 , 0 , 0 , //32 SP 33 ! 34 " 35 # //36 $ 37 % 38 & 39 ' - CHAR_HORZ_WS, 0 , 0 , 0 , - 0 , 0 , 0 , 0 , + CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , + 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , //40 ( 41 ) 42 * 43 + //44 , 45 - 46 . 47 / - 0 , 0 , 0 , 0 , - 0 , 0 , CHAR_PERIOD , 0 , + 0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL , //48 0 49 1 50 2 51 3 //52 4 53 5 54 6 55 7 CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , //56 8 57 9 58 : 59 ; //60 < 61 = 62 > 63 ? - CHAR_NUMBER , CHAR_NUMBER , 0 , 0 , - 0 , 0 , 0 , 0 , + CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , //64 @ 65 A 66 B 67 C //68 D 69 E 70 F 71 G 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , @@ -814,8 +847,8 @@ static const unsigned char CharInfo[256] = CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , //88 X 89 Y 90 Z 91 [ //92 \ 93 ] 94 ^ 95 _ - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 , - 0 , 0 , 0 , CHAR_UNDER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL , + 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER , //96 ` 97 a 98 b 99 c //100 d 101 e 102 f 103 g 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , @@ -829,9 +862,9 @@ static const unsigned char CharInfo[256] = CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , //120 x 121 y 122 z 123 { -//124 | 125 } 126 ~ 127 DEL - CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 , - 0 , 0 , 0 , 0 +//124 | 125 } 126 ~ 127 DEL + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL , + CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0 }; static void InitCharacterInfo() { @@ -869,6 +902,12 @@ static inline bool isHorizontalWhitespace(unsigned char c) { return (CharInfo[c] & CHAR_HORZ_WS) ? true : false; } +/// isVerticalWhitespace - Return true if this character is vertical +/// whitespace: '\n', '\r'. Note that this returns false for '\0'. +static inline bool isVerticalWhitespace(unsigned char c) { + return (CharInfo[c] & CHAR_VERT_WS) ? true : false; +} + /// isWhitespace - Return true if this character is horizontal or vertical /// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false /// for '\0'. @@ -883,6 +922,14 @@ static inline bool isNumberBody(unsigned char c) { true : false; } +/// isRawStringDelimBody - Return true if this is the body character of a +/// raw string delimiter. +static inline bool isRawStringDelimBody(unsigned char c) { + return (CharInfo[c] & + (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ? + true : false; +} + //===----------------------------------------------------------------------===// // Diagnostics forwarding code. @@ -907,14 +954,14 @@ static SourceLocation GetMappedTokenLoc(Preprocessor &PP, // Create a new SLoc which is expanded from Expansion(FileLoc) but whose // characters come from spelling(FileLoc)+Offset. SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc); - SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo); + SpellingLoc = SpellingLoc.getLocWithOffset(CharNo); // Figure out the expansion loc range, which is the range covered by the // original _Pragma(...) sequence. std::pair<SourceLocation,SourceLocation> II = - SM.getImmediateInstantiationRange(FileLoc); + SM.getImmediateExpansionRange(FileLoc); - return SM.createInstantiationLoc(SpellingLoc, II.first, II.second, TokLen); + return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen); } /// getSourceLocation - Return a source location identifier for the specified @@ -928,7 +975,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc, // the file id from FileLoc with the offset specified. unsigned CharNo = Loc-BufferStart; if (FileLoc.isFileID()) - return FileLoc.getFileLocWithOffset(CharNo); + return FileLoc.getLocWithOffset(CharNo); // Otherwise, this is the _Pragma lexer case, which pretends that all of the // tokens are lexed from where the _Pragma was defined. @@ -978,7 +1025,7 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) { } if (!L->isLexingRawMode()) - L->Diag(CP-2, diag::trigraph_converted) << llvm::StringRef(&Res, 1); + L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1); return Res; } @@ -1028,6 +1075,59 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { } } +/// \brief Checks that the given token is the first token that occurs after the +/// given location (this excludes comments and whitespace). Returns the location +/// immediately after the specified token. If the token is not found or the +/// location is inside a macro, the returned source location will be invalid. +SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, + tok::TokenKind TKind, + const SourceManager &SM, + const LangOptions &LangOpts, + bool SkipTrailingWhitespaceAndNewLine) { + if (Loc.isMacroID()) { + if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts)) + return SourceLocation(); + Loc = SM.getExpansionRange(Loc).second; + } + Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); + + // Break down the source location. + std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); + + // Try to load the file buffer. + bool InvalidTemp = false; + llvm::StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); + if (InvalidTemp) + return SourceLocation(); + + const char *TokenBegin = File.data() + LocInfo.second; + + // Lex from the start of the given location. + Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(), + TokenBegin, File.end()); + // Find the token. + Token Tok; + lexer.LexFromRawLexer(Tok); + if (Tok.isNot(TKind)) + return SourceLocation(); + SourceLocation TokenLoc = Tok.getLocation(); + + // Calculate how much whitespace needs to be skipped if any. + unsigned NumWhitespaceChars = 0; + if (SkipTrailingWhitespaceAndNewLine) { + const char *TokenEnd = SM.getCharacterData(TokenLoc) + + Tok.getLength(); + unsigned char C = *TokenEnd; + while (isHorizontalWhitespace(C)) { + C = *(++TokenEnd); + NumWhitespaceChars++; + } + if (isVerticalWhitespace(C)) + NumWhitespaceChars++; + } + + return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars); +} /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, /// get its size, and return it. This is tricky in several cases: @@ -1191,6 +1291,7 @@ FinishIdentifier: // preprocessor, which may macro expand it or something. if (II->isHandleIdentifierCase()) PP->HandleIdentifier(Result); + return; } @@ -1252,13 +1353,12 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) { // If we are in Microsoft mode, don't continue if the constant is hex. // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1 - if (!Features.Microsoft || !isHexaLiteral(BufferPtr, Features)) + if (!Features.MicrosoftExt || !isHexaLiteral(BufferPtr, Features)) return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); } // If we have a hex FP constant, continue. - if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p') && - !Features.CPlusPlus0x) + if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); // Update the location of token as well as BufferPtr. @@ -1268,10 +1368,17 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { } /// LexStringLiteral - Lex the remainder of a string literal, after having lexed -/// either " or L". -void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { +/// either " or L" or u8" or u" or U". +void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, + tok::TokenKind Kind) { const char *NulCharacter = 0; // Does this string contain the \0 character? + if (!isLexingRawMode() && + (Kind == tok::utf8_string_literal || + Kind == tok::utf16_string_literal || + Kind == tok::utf32_string_literal)) + Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal); + char C = getAndAdvanceChar(CurPtr, Result); while (C != '"') { // Skip escaped characters. Escaped newlines will already be processed by @@ -1281,16 +1388,21 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc)) - PP->CodeCompleteNaturalLanguage(); - else if (!isLexingRawMode() && !Features.AsmPreprocessor) + if (!isLexingRawMode() && !Features.AsmPreprocessor) Diag(BufferPtr, diag::warn_unterminated_string); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; } - if (C == 0) + if (C == 0) { + if (isCodeCompletionPoint(CurPtr-1)) { + PP->CodeCompleteNaturalLanguage(); + FormTokenWithChars(Result, CurPtr-1, tok::unknown); + return cutOffLexing(); + } + NulCharacter = CurPtr-1; + } C = getAndAdvanceChar(CurPtr, Result); } @@ -1300,8 +1412,82 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { // Update the location of the token as well as the BufferPtr instance var. const char *TokStart = BufferPtr; - FormTokenWithChars(Result, CurPtr, - Wide ? tok::wide_string_literal : tok::string_literal); + FormTokenWithChars(Result, CurPtr, Kind); + Result.setLiteralData(TokStart); +} + +/// LexRawStringLiteral - Lex the remainder of a raw string literal, after +/// having lexed R", LR", u8R", uR", or UR". +void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, + tok::TokenKind Kind) { + // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3: + // Between the initial and final double quote characters of the raw string, + // any transformations performed in phases 1 and 2 (trigraphs, + // universal-character-names, and line splicing) are reverted. + + if (!isLexingRawMode()) + Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal); + + unsigned PrefixLen = 0; + + while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) + ++PrefixLen; + + // If the last character was not a '(', then we didn't lex a valid delimiter. + if (CurPtr[PrefixLen] != '(') { + if (!isLexingRawMode()) { + const char *PrefixEnd = &CurPtr[PrefixLen]; + if (PrefixLen == 16) { + Diag(PrefixEnd, diag::err_raw_delim_too_long); + } else { + Diag(PrefixEnd, diag::err_invalid_char_raw_delim) + << StringRef(PrefixEnd, 1); + } + } + + // Search for the next '"' in hopes of salvaging the lexer. Unfortunately, + // it's possible the '"' was intended to be part of the raw string, but + // there's not much we can do about that. + while (1) { + char C = *CurPtr++; + + if (C == '"') + break; + if (C == 0 && CurPtr-1 == BufferEnd) { + --CurPtr; + break; + } + } + + FormTokenWithChars(Result, CurPtr, tok::unknown); + return; + } + + // Save prefix and move CurPtr past it + const char *Prefix = CurPtr; + CurPtr += PrefixLen + 1; // skip over prefix and '(' + + while (1) { + char C = *CurPtr++; + + if (C == ')') { + // Check for prefix match and closing quote. + if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') { + CurPtr += PrefixLen + 1; // skip over prefix and '"' + break; + } + } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file. + if (!isLexingRawMode()) + Diag(BufferPtr, diag::err_unterminated_raw_string) + << StringRef(Prefix, PrefixLen); + FormTokenWithChars(Result, CurPtr-1, tok::unknown); + return; + } + } + + // Update the location of token as well as BufferPtr. + const char *TokStart = BufferPtr; + FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); } @@ -1317,7 +1503,8 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { // Skip the escaped character. C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. - (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + (C == 0 && (CurPtr-1 == BufferEnd || // End of file. + isCodeCompletionPoint(CurPtr-1)))) { // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); @@ -1340,10 +1527,15 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { /// LexCharConstant - Lex the remainder of a character constant, after having -/// lexed either ' or L'. -void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { +/// lexed either ' or L' or u' or U'. +void Lexer::LexCharConstant(Token &Result, const char *CurPtr, + tok::TokenKind Kind) { const char *NulCharacter = 0; // Does this character contain the \0 character? + if (!isLexingRawMode() && + (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)) + Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal); + char C = getAndAdvanceChar(CurPtr, Result); if (C == '\'') { if (!isLexingRawMode() && !Features.AsmPreprocessor) @@ -1360,13 +1552,17 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc)) - PP->CodeCompleteNaturalLanguage(); - else if (!isLexingRawMode() && !Features.AsmPreprocessor) + if (!isLexingRawMode() && !Features.AsmPreprocessor) Diag(BufferPtr, diag::warn_unterminated_char); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; } else if (C == 0) { + if (isCodeCompletionPoint(CurPtr-1)) { + PP->CodeCompleteNaturalLanguage(); + FormTokenWithChars(Result, CurPtr-1, tok::unknown); + return cutOffLexing(); + } + NulCharacter = CurPtr-1; } C = getAndAdvanceChar(CurPtr, Result); @@ -1378,7 +1574,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; - FormTokenWithChars(Result, CurPtr, tok::char_constant); + FormTokenWithChars(Result, CurPtr, Kind); Result.setLiteralData(TokStart); } @@ -1451,20 +1647,28 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { char C; do { C = *CurPtr; - // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character. - // If we find a \n character, scan backwards, checking to see if it's an - // escaped newline, like we do for block comments. - // Skip over characters in the fast loop. while (C != 0 && // Potentially EOF. - C != '\\' && // Potentially escaped newline. - C != '?' && // Potentially trigraph. C != '\n' && C != '\r') // Newline or DOS-style newline. C = *++CurPtr; - // If this is a newline, we're done. - if (C == '\n' || C == '\r') - break; // Found the newline? Break out! + const char *NextLine = CurPtr; + if (C != 0) { + // We found a newline, see if it's escaped. + const char *EscapePtr = CurPtr-1; + while (isHorizontalWhitespace(*EscapePtr)) // Skip whitespace. + --EscapePtr; + + if (*EscapePtr == '\\') // Escaped newline. + CurPtr = EscapePtr; + else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' && + EscapePtr[-2] == '?') // Trigraph-escaped newline. + CurPtr = EscapePtr-2; + else + break; // This is a newline, we're done. + + C = *CurPtr; + } // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to // properly decode the character. Read it in raw mode to avoid emitting @@ -1476,6 +1680,13 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); LexingRawMode = OldRawMode; + // If we only read only one character, then no special handling is needed. + // We're done and can skip forward to the newline. + if (C != 0 && CurPtr == OldPtr+1) { + CurPtr = NextLine; + break; + } + // If the char that we finally got was a \n, then we must have had something // like \<newline><newline>. We don't want to have consumed the second // newline, we want CurPtr, to end up pointing to it down below. @@ -1492,9 +1703,9 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { // Okay, we found a // comment that ends in a newline, if the next // line is also a // comment, but has spaces, don't emit a diagnostic. - if (isspace(C)) { + if (isWhitespace(C)) { const char *ForwardPtr = CurPtr; - while (isspace(*ForwardPtr)) // Skip whitespace. + while (isWhitespace(*ForwardPtr)) // Skip whitespace. ++ForwardPtr; if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') break; @@ -1507,12 +1718,16 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { } if (CurPtr == BufferEnd+1) { - if (PP && PP->isCodeCompletionFile(FileLoc)) - PP->CodeCompleteNaturalLanguage(); - --CurPtr; break; } + + if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { + PP->CodeCompleteNaturalLanguage(); + cutOffLexing(); + return false; + } + } while (C != '\n' && C != '\r'); // Found but did not consume the newline. Notify comment handlers about the @@ -1573,7 +1788,7 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { Result.setKind(tok::comment); PP->CreateString(&Spelling[0], Spelling.size(), Result, - Result.getLocation()); + Result.getLocation(), Result.getLocation()); return true; } @@ -1667,8 +1882,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { unsigned char C = getCharAndSize(CurPtr, CharSize); CurPtr += CharSize; if (C == 0 && CurPtr == BufferEnd+1) { - if (!isLexingRawMode() && - !PP->isCodeCompletionFile(FileLoc)) + if (!isLexingRawMode()) Diag(BufferPtr, diag::err_unterminated_block_comment); --CurPtr; @@ -1691,7 +1905,10 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { while (1) { // Skip over all non-interesting characters until we find end of buffer or a // (probably ending) '/' character. - if (CurPtr + 24 < BufferEnd) { + if (CurPtr + 24 < BufferEnd && + // If there is a code-completion point avoid the fast scan because it + // doesn't check for '\0'. + !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) { // While not aligned to a 16-byte boundary. while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) C = *CurPtr++; @@ -1751,9 +1968,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { Diag(CurPtr-1, diag::warn_nested_block_comment); } } else if (C == 0 && CurPtr == BufferEnd+1) { - if (PP && PP->isCodeCompletionFile(FileLoc)) - PP->CodeCompleteNaturalLanguage(); - else if (!isLexingRawMode()) + if (!isLexingRawMode()) Diag(BufferPtr, diag::err_unterminated_block_comment); // Note: the user probably forgot a */. We could continue immediately // after the /*, but this would involve lexing a lot of what really is the @@ -1769,7 +1984,12 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { BufferPtr = CurPtr; return false; + } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { + PP->CodeCompleteNaturalLanguage(); + cutOffLexing(); + return false; } + C = *CurPtr++; } @@ -1826,6 +2046,12 @@ std::string Lexer::ReadToEndOfLine() { case 0: // Null. // Found end of file? if (CurPtr-1 != BufferEnd) { + if (isCodeCompletionPoint(CurPtr-1)) { + PP->CodeCompleteNaturalLanguage(); + cutOffLexing(); + return Result; + } + // Nope, normal character, continue. Result += Char; break; @@ -1840,8 +2066,8 @@ std::string Lexer::ReadToEndOfLine() { // Next, lex the character, which should handle the EOD transition. Lex(Tmp); if (Tmp.is(tok::code_completion)) { - if (PP && PP->getCodeCompletionHandler()) - PP->getCodeCompletionHandler()->CodeCompleteNaturalLanguage(); + if (PP) + PP->CodeCompleteNaturalLanguage(); Lex(Tmp); } assert(Tmp.is(tok::eod) && "Unexpected token!"); @@ -1857,22 +2083,6 @@ std::string Lexer::ReadToEndOfLine() { /// This returns true if Result contains a token, false if PP.Lex should be /// called again. bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { - // Check if we are performing code completion. - if (PP && PP->isCodeCompletionFile(FileLoc)) { - // We're at the end of the file, but we've been asked to consider the - // end of the file to be a code-completion token. Return the - // code-completion token. - Result.startToken(); - FormTokenWithChars(Result, CurPtr, tok::code_completion); - - // Only do the eof -> code_completion translation once. - PP->SetCodeCompletionPoint(0, 0, 0); - - // Silence any diagnostics that occur once we hit the code-completion point. - PP->getDiagnostics().setSuppressAllDiagnostics(true); - return true; - } - // If we hit the end of the file while parsing a preprocessor directive, // end the preprocessor directive first. The next token returned will // then be the end of file. @@ -1900,7 +2110,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { // If we are in a #if directive, emit an error. while (!ConditionalStack.empty()) { - if (!PP->isCodeCompletionFile(FileLoc)) + if (PP->getCodeCompletionFileLoc() != FileLoc) PP->Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); ConditionalStack.pop_back(); @@ -1951,15 +2161,18 @@ unsigned Lexer::isNextPPTokenLParen() { } /// FindConflictEnd - Find the end of a version control conflict marker. -static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) { - llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7); - size_t Pos = RestOfBuffer.find(">>>>>>>"); - while (Pos != llvm::StringRef::npos) { +static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, + ConflictMarkerKind CMK) { + const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; + size_t TermLen = CMK == CMK_Perforce ? 5 : 7; + StringRef RestOfBuffer(CurPtr+TermLen, BufferEnd-CurPtr-TermLen); + size_t Pos = RestOfBuffer.find(Terminator); + while (Pos != StringRef::npos) { // Must occur at start of line. if (RestOfBuffer[Pos-1] != '\r' && RestOfBuffer[Pos-1] != '\n') { - RestOfBuffer = RestOfBuffer.substr(Pos+7); - Pos = RestOfBuffer.find(">>>>>>>"); + RestOfBuffer = RestOfBuffer.substr(Pos+TermLen); + Pos = RestOfBuffer.find(Terminator); continue; } return RestOfBuffer.data()+Pos; @@ -1977,23 +2190,25 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { CurPtr[-1] != '\n' && CurPtr[-1] != '\r') return false; - // Check to see if we have <<<<<<<. - if (BufferEnd-CurPtr < 8 || - llvm::StringRef(CurPtr, 7) != "<<<<<<<") + // Check to see if we have <<<<<<< or >>>>. + if ((BufferEnd-CurPtr < 8 || StringRef(CurPtr, 7) != "<<<<<<<") && + (BufferEnd-CurPtr < 6 || StringRef(CurPtr, 5) != ">>>> ")) return false; // If we have a situation where we don't care about conflict markers, ignore // it. - if (IsInConflictMarker || isLexingRawMode()) + if (CurrentConflictMarkerState || isLexingRawMode()) return false; - // Check to see if there is a >>>>>>> somewhere in the buffer at the start of - // a line to terminate this conflict marker. - if (FindConflictEnd(CurPtr, BufferEnd)) { + ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce; + + // Check to see if there is an ending marker somewhere in the buffer at the + // start of a line to terminate this conflict marker. + if (FindConflictEnd(CurPtr, BufferEnd, Kind)) { // We found a match. We are really in a conflict marker. // Diagnose this, and ignore to the end of line. Diag(CurPtr, diag::err_conflict_marker); - IsInConflictMarker = true; + CurrentConflictMarkerState = Kind; // Skip ahead to the end of line. We know this exists because the // end-of-conflict marker starts with \r or \n. @@ -2010,10 +2225,10 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { } -/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>' -/// marker, then it is the end of a conflict marker. Handle it by ignoring up -/// until the end of the line. This returns true if it is a conflict marker and -/// false if not. +/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if +/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it +/// is the end of a conflict marker. Handle it by ignoring up until the end of +/// the line. This returns true if it is a conflict marker and false if not. bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { // Only a conflict marker if it starts at the beginning of a line. if (CurPtr != BufferStart && @@ -2022,18 +2237,19 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { // If we have a situation where we don't care about conflict markers, ignore // it. - if (!IsInConflictMarker || isLexingRawMode()) + if (!CurrentConflictMarkerState || isLexingRawMode()) return false; - // Check to see if we have the marker (7 characters in a row). - for (unsigned i = 1; i != 7; ++i) + // Check to see if we have the marker (4 characters in a row). + for (unsigned i = 1; i != 4; ++i) if (CurPtr[i] != CurPtr[0]) return false; // If we do have it, search for the end of the conflict marker. This could // fail if it got skipped with a '#if 0' or something. Note that CurPtr might // be the end of conflict marker. - if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) { + if (const char *End = FindConflictEnd(CurPtr, BufferEnd, + CurrentConflictMarkerState)) { CurPtr = End; // Skip ahead to the end of line. @@ -2043,13 +2259,22 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { BufferPtr = CurPtr; // No longer in the conflict marker. - IsInConflictMarker = false; + CurrentConflictMarkerState = CMK_None; return true; } return false; } +bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { + if (PP && PP->isCodeCompletionEnabled()) { + SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart); + return Loc == PP->getCodeCompletionLoc(); + } + + return false; +} + /// LexTokenInternal - This implements a simple C family lexer. It is an /// extremely performance critical piece of code. This assumes that the buffer @@ -2102,6 +2327,14 @@ LexNextToken: return PPCache->Lex(Result); } + // Check if we are performing code completion. + if (isCodeCompletionPoint(CurPtr-1)) { + // Return the code-completion token. + Result.startToken(); + FormTokenWithChars(Result, CurPtr, tok::code_completion); + return; + } + if (!isLexingRawMode()) Diag(CurPtr-1, diag::null_in_file); Result.setFlag(Token::LeadingSpace); @@ -2112,7 +2345,7 @@ LexNextToken: case 26: // DOS & CP/M EOF: "^Z". // If we're in Microsoft extensions mode, treat this as end of file. - if (Features.Microsoft) { + if (Features.MicrosoftExt) { // Read the PP instance variable into an automatic variable, because // LexEndOfFile will often delete 'this'. Preprocessor *PPCache = PP; @@ -2186,6 +2419,102 @@ LexNextToken: MIOpt.ReadToken(); return LexNumericConstant(Result, CurPtr); + case 'u': // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + + if (Features.CPlusPlus0x) { + Char = getCharAndSize(CurPtr, SizeTmp); + + // UTF-16 string literal + if (Char == '"') + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), + tok::utf16_string_literal); + + // UTF-16 character constant + if (Char == '\'') + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), + tok::utf16_char_constant); + + // UTF-16 raw string literal + if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') + return LexRawStringLiteral(Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result), + tok::utf16_string_literal); + + if (Char == '8') { + char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); + + // UTF-8 string literal + if (Char2 == '"') + return LexStringLiteral(Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result), + tok::utf8_string_literal); + + if (Char2 == 'R') { + unsigned SizeTmp3; + char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); + // UTF-8 raw string literal + if (Char3 == '"') { + return LexRawStringLiteral(Result, + ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result), + SizeTmp3, Result), + tok::utf8_string_literal); + } + } + } + } + + // treat u like the start of an identifier. + return LexIdentifier(Result, CurPtr); + + case 'U': // Identifier (Uber) or C++0x UTF-32 string literal + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + + if (Features.CPlusPlus0x) { + Char = getCharAndSize(CurPtr, SizeTmp); + + // UTF-32 string literal + if (Char == '"') + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), + tok::utf32_string_literal); + + // UTF-32 character constant + if (Char == '\'') + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), + tok::utf32_char_constant); + + // UTF-32 raw string literal + if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') + return LexRawStringLiteral(Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result), + tok::utf32_string_literal); + } + + // treat U like the start of an identifier. + return LexIdentifier(Result, CurPtr); + + case 'R': // Identifier or C++0x raw string literal + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + + if (Features.CPlusPlus0x) { + Char = getCharAndSize(CurPtr, SizeTmp); + + if (Char == '"') + return LexRawStringLiteral(Result, + ConsumeChar(CurPtr, SizeTmp, Result), + tok::string_literal); + } + + // treat R like the start of an identifier. + return LexIdentifier(Result, CurPtr); + case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz"). // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); @@ -2194,21 +2523,30 @@ LexNextToken: // Wide string literal. if (Char == '"') return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), - true); + tok::wide_string_literal); + + // Wide raw string literal. + if (Features.CPlusPlus0x && Char == 'R' && + getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') + return LexRawStringLiteral(Result, + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result), + tok::wide_string_literal); // Wide character constant. if (Char == '\'') - return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), + tok::wide_char_constant); // FALL THROUGH, treating L like the start of an identifier. // C99 6.4.2: Identifiers. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N': - case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/ case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ case 'v': case 'w': case 'x': case 'y': case 'z': case '_': // Notify MIOpt that we read a non-whitespace/non-comment token. @@ -2231,13 +2569,13 @@ LexNextToken: case '\'': // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexCharConstant(Result, CurPtr); + return LexCharConstant(Result, CurPtr, tok::char_constant); // C99 6.4.5: String Literals. case '"': // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexStringLiteral(Result, CurPtr, false); + return LexStringLiteral(Result, CurPtr, tok::string_literal); // C99 6.4.6: Punctuators. case '?': @@ -2396,7 +2734,7 @@ LexNextToken: Kind = tok::hashhash; // '%:%:' -> '##' CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); - } else if (Char == '@' && Features.Microsoft) { // %:@ -> #@ -> Charize + } else if (Char == '@' && Features.MicrosoftExt) {// %:@ -> #@ -> Charize CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); if (!isLexingRawMode()) Diag(BufferPtr, diag::charize_microsoft_ext); @@ -2447,6 +2785,10 @@ LexNextToken: // If this is actually a '<<<<<<<' version control conflict marker, // recognize it as such and recover nicely. goto LexNextToken; + } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) { + // If this is '<<<<' and we're in a Perforce-style conflict marker, + // ignore it. + goto LexNextToken; } else if (Features.CUDA && After == '<') { Kind = tok::lesslessless; CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), @@ -2470,6 +2812,8 @@ LexNextToken: char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); if (After != ':' && After != '>') { Kind = tok::less; + if (!isLexingRawMode()) + Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon); break; } } @@ -2494,6 +2838,10 @@ LexNextToken: CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); Kind = tok::greatergreaterequal; + } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) { + // If this is actually a '>>>>' conflict marker, recognize it as such + // and recover nicely. + goto LexNextToken; } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { // If this is '>>>>>>>' and we're in a conflict marker, ignore it. goto LexNextToken; @@ -2552,7 +2900,7 @@ LexNextToken: case '=': Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '=') { - // If this is '=======' and we're in a conflict marker, ignore it. + // If this is '====' and we're in a conflict marker, ignore it. if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) goto LexNextToken; @@ -2570,7 +2918,7 @@ LexNextToken: if (Char == '#') { Kind = tok::hashhash; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else if (Char == '@' && Features.Microsoft) { // #@ -> Charize + } else if (Char == '@' && Features.MicrosoftExt) { // #@ -> Charize Kind = tok::hashat; if (!isLexingRawMode()) Diag(BufferPtr, diag::charize_microsoft_ext); diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 2c96c4d4ee24..70183fd1a0ea 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -16,8 +16,8 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Basic/TargetInfo.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" using namespace clang; /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's @@ -29,12 +29,31 @@ static int HexDigitValue(char C) { return -1; } +static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) { + switch (kind) { + default: llvm_unreachable("Unknown token type!"); + case tok::char_constant: + case tok::string_literal: + case tok::utf8_string_literal: + return Target.getCharWidth(); + case tok::wide_char_constant: + case tok::wide_string_literal: + return Target.getWCharWidth(); + case tok::utf16_char_constant: + case tok::utf16_string_literal: + return Target.getChar16Width(); + case tok::utf32_char_constant: + case tok::utf32_string_literal: + return Target.getChar32Width(); + } +} + /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in /// either a character or a string literal. static unsigned ProcessCharEscape(const char *&ThisTokBuf, const char *ThisTokEnd, bool &HadError, - FullSourceLoc Loc, bool IsWide, - Diagnostic *Diags, const TargetInfo &Target) { + FullSourceLoc Loc, unsigned CharWidth, + DiagnosticsEngine *Diags) { // Skip the '\' char. ++ThisTokBuf; @@ -99,9 +118,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, } // See if any bits will be truncated when evaluated as a character. - unsigned CharWidth = - IsWide ? Target.getWCharWidth() : Target.getCharWidth(); - if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { Overflow = true; ResultChar &= ~0U >> (32-CharWidth); @@ -129,9 +145,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7'); // Check for overflow. Reject '\777', but not L'\777'. - unsigned CharWidth = - IsWide ? Target.getWCharWidth() : Target.getCharWidth(); - if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { if (Diags) Diags->Report(Loc, diag::warn_octal_escape_too_large); @@ -167,7 +180,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, /// return the UTF32. static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, - FullSourceLoc Loc, Diagnostic *Diags, + FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features) { if (!Features.CPlusPlus && !Features.C99 && Diags) Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89); @@ -220,7 +233,8 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, /// we will likely rework our support for UCN's. static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, - FullSourceLoc Loc, bool wide, Diagnostic *Diags, + FullSourceLoc Loc, unsigned CharByteWidth, + DiagnosticsEngine *Diags, const LangOptions &Features) { typedef uint32_t UTF32; UTF32 UcnVal = 0; @@ -231,19 +245,22 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, return; } - if (wide) { - (void)UcnLen; - assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported"); + assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) && + "only character widths of 1, 2, or 4 bytes supported"); - if (!Features.ShortWChar) { - // Note: our internal rep of wide char tokens is always little-endian. - *ResultBuf++ = (UcnVal & 0x000000FF); - *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; - *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16; - *ResultBuf++ = (UcnVal & 0xFF000000) >> 24; - return; - } + (void)UcnLen; + assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported"); + if (CharByteWidth == 4) { + // Note: our internal rep of wide char tokens is always little-endian. + *ResultBuf++ = (UcnVal & 0x000000FF); + *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; + *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16; + *ResultBuf++ = (UcnVal & 0xFF000000) >> 24; + return; + } + + if (CharByteWidth == 2) { // Convert to UTF16. if (UcnVal < (UTF32)0xFFFF) { *ResultBuf++ = (UcnVal & 0x000000FF); @@ -262,6 +279,9 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8; return; } + + assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters"); + // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8. // The conversion below was inspired by: // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c @@ -371,7 +391,7 @@ NumericLiteralParser(const char *begin, const char *end, // Done. } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), - diag::err_invalid_decimal_digit) << llvm::StringRef(s, 1); + diag::err_invalid_decimal_digit) << StringRef(s, 1); hadError = true; return; } else if (*s == '.') { @@ -434,7 +454,7 @@ NumericLiteralParser(const char *begin, const char *end, continue; // Success. case 'i': case 'I': - if (PP.getLangOptions().Microsoft) { + if (PP.getLangOptions().MicrosoftExt) { if (isFPConstant || isLong || isLongLong) break; // Allow i8, i16, i32, i64, and i128. @@ -498,7 +518,7 @@ NumericLiteralParser(const char *begin, const char *end, PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), isFPConstant ? diag::err_invalid_suffix_float_constant : diag::err_invalid_suffix_integer_constant) - << llvm::StringRef(SuffixBegin, ThisTokEnd-SuffixBegin); + << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin); hadError = true; return; } @@ -528,7 +548,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { } // A binary exponent can appear with or with a '.'. If dotted, the // binary exponent is required. - if ((*s == 'p' || *s == 'P') && !PP.getLangOptions().CPlusPlus0x) { + if (*s == 'p' || *s == 'P') { const char *Exponent = s; s++; saw_exponent = true; @@ -542,12 +562,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { } s = first_non_digit; - // In C++0x, we cannot support hexadecmial floating literals because - // they conflict with user-defined literals, so we warn in previous - // versions of C++ by default. - if (PP.getLangOptions().CPlusPlus) - PP.Diag(TokLoc, diag::ext_hexconstant_cplusplus); - else if (!PP.getLangOptions().HexFloats) + if (!PP.getLangOptions().HexFloats) PP.Diag(TokLoc, diag::ext_hexconstant_invalid); } else if (saw_period) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), @@ -569,7 +584,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // Done. } else if (isxdigit(*s)) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), - diag::err_invalid_binary_digit) << llvm::StringRef(s, 1); + diag::err_invalid_binary_digit) << StringRef(s, 1); hadError = true; } // Other suffixes will be diagnosed by the caller. @@ -599,7 +614,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // the code is using an incorrect base. if (isxdigit(*s) && *s != 'e' && *s != 'E') { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), - diag::err_invalid_octal_digit) << llvm::StringRef(s, 1); + diag::err_invalid_octal_digit) << StringRef(s, 1); hadError = true; return; } @@ -688,7 +703,6 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { llvm::APFloat::opStatus NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { using llvm::APFloat; - using llvm::StringRef; unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin); return Result.convertFromString(StringRef(ThisTokBegin, n), @@ -696,14 +710,51 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { } +/// character-literal: [C++0x lex.ccon] +/// ' c-char-sequence ' +/// u' c-char-sequence ' +/// U' c-char-sequence ' +/// L' c-char-sequence ' +/// c-char-sequence: +/// c-char +/// c-char-sequence c-char +/// c-char: +/// any member of the source character set except the single-quote ', +/// backslash \, or new-line character +/// escape-sequence +/// universal-character-name +/// escape-sequence: [C++0x lex.ccon] +/// simple-escape-sequence +/// octal-escape-sequence +/// hexadecimal-escape-sequence +/// simple-escape-sequence: +/// one of \' \" \? \\ \a \b \f \n \r \t \v +/// octal-escape-sequence: +/// \ octal-digit +/// \ octal-digit octal-digit +/// \ octal-digit octal-digit octal-digit +/// hexadecimal-escape-sequence: +/// \x hexadecimal-digit +/// hexadecimal-escape-sequence hexadecimal-digit +/// universal-character-name: +/// \u hex-quad +/// \U hex-quad hex-quad +/// hex-quad: +/// hex-digit hex-digit hex-digit hex-digit +/// CharLiteralParser::CharLiteralParser(const char *begin, const char *end, - SourceLocation Loc, Preprocessor &PP) { + SourceLocation Loc, Preprocessor &PP, + tok::TokenKind kind) { // At this point we know that the character matches the regex "L?'.*'". HadError = false; - // Determine if this is a wide character. - IsWide = begin[0] == 'L'; - if (IsWide) ++begin; + Kind = kind; + + // Determine if this is a wide or UTF character. + if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant || + Kind == tok::utf32_char_constant) { + ++begin; + } // Skip over the entry quote. assert(begin[0] == '\'' && "Invalid token lexed"); @@ -730,8 +781,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, // Is this a Universal Character Name escape? if (begin[0] != '\\') // If this is a normal character, consume it. - ResultChar = *begin++; + ResultChar = (unsigned char)*begin++; else { // Otherwise, this is an escape character. + unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); // Check for UCN. if (begin[1] == 'u' || begin[1] == 'U') { uint32_t utf32 = 0; @@ -742,19 +794,22 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, HadError = 1; } ResultChar = utf32; + if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { + PP.Diag(Loc, diag::warn_ucn_escape_too_large); + ResultChar &= ~0U >> (32-CharWidth); + } } else { // Otherwise, this is a non-UCN escape character. Process it. ResultChar = ProcessCharEscape(begin, end, HadError, FullSourceLoc(Loc,PP.getSourceManager()), - IsWide, - &PP.getDiagnostics(), PP.getTargetInfo()); + CharWidth, &PP.getDiagnostics()); } } // If this is a multi-character constant (e.g. 'abc'), handle it. These are // implementation defined (C99 6.4.4.4p10). if (NumCharsSoFar) { - if (IsWide) { + if (!isAscii()) { // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'. LitVal = 0; } else { @@ -776,8 +831,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, if (NumCharsSoFar > 1) { // Warn about discarding the top bits for multi-char wide-character // constants (L'abcd'). - if (IsWide) - PP.Diag(Loc, diag::warn_extraneous_wide_char_constant); + if (!isAscii()) + PP.Diag(Loc, diag::warn_extraneous_char_constant); else if (NumCharsSoFar != 4) PP.Diag(Loc, diag::ext_multichar_character_literal); else @@ -789,47 +844,62 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, // Transfer the value from APInt to uint64_t Value = LitVal.getZExtValue(); - if (IsWide && PP.getLangOptions().ShortWChar && Value > 0xFFFF) - PP.Diag(Loc, diag::warn_ucn_escape_too_large); - // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1") // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple // character constants are not sign extended in the this implementation: // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC. - if (!IsWide && NumCharsSoFar == 1 && (Value & 128) && + if (isAscii() && NumCharsSoFar == 1 && (Value & 128) && PP.getLangOptions().CharIsSigned) Value = (signed char)Value; } -/// string-literal: [C99 6.4.5] -/// " [s-char-sequence] " -/// L" [s-char-sequence] " +/// string-literal: [C++0x lex.string] +/// encoding-prefix " [s-char-sequence] " +/// encoding-prefix R raw-string +/// encoding-prefix: +/// u8 +/// u +/// U +/// L /// s-char-sequence: /// s-char /// s-char-sequence s-char /// s-char: -/// any source character except the double quote ", -/// backslash \, or newline character -/// escape-character -/// universal-character-name -/// escape-character: [C99 6.4.4.4] -/// \ escape-code +/// any member of the source character set except the double-quote ", +/// backslash \, or new-line character +/// escape-sequence /// universal-character-name -/// escape-code: -/// character-escape-code -/// octal-escape-code -/// hex-escape-code -/// character-escape-code: one of -/// n t b r f v a -/// \ ' " ? -/// octal-escape-code: -/// octal-digit -/// octal-digit octal-digit -/// octal-digit octal-digit octal-digit -/// hex-escape-code: -/// x hex-digit -/// hex-escape-code hex-digit +/// raw-string: +/// " d-char-sequence ( r-char-sequence ) d-char-sequence " +/// r-char-sequence: +/// r-char +/// r-char-sequence r-char +/// r-char: +/// any member of the source character set, except a right parenthesis ) +/// followed by the initial d-char-sequence (which may be empty) +/// followed by a double quote ". +/// d-char-sequence: +/// d-char +/// d-char-sequence d-char +/// d-char: +/// any member of the basic source character set except: +/// space, the left parenthesis (, the right parenthesis ), +/// the backslash \, and the control characters representing horizontal +/// tab, vertical tab, form feed, and newline. +/// escape-sequence: [C++0x lex.ccon] +/// simple-escape-sequence +/// octal-escape-sequence +/// hexadecimal-escape-sequence +/// simple-escape-sequence: +/// one of \' \" \? \\ \a \b \f \n \r \t \v +/// octal-escape-sequence: +/// \ octal-digit +/// \ octal-digit octal-digit +/// \ octal-digit octal-digit octal-digit +/// hexadecimal-escape-sequence: +/// \x hexadecimal-digit +/// hexadecimal-escape-sequence hexadecimal-digit /// universal-character-name: /// \u hex-quad /// \U hex-quad hex-quad @@ -841,8 +911,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, Preprocessor &PP, bool Complain) : SM(PP.getSourceManager()), Features(PP.getLangOptions()), Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0), - MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0), - ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) { + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { init(StringToks, NumStringToks); } @@ -862,7 +932,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ MaxTokenLength = StringToks[0].getLength(); assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); SizeBound = StringToks[0].getLength()-2; // -2 for "". - AnyWide = StringToks[0].is(tok::wide_string_literal); + Kind = StringToks[0].getKind(); hadError = false; @@ -883,8 +953,18 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ if (StringToks[i].getLength() > MaxTokenLength) MaxTokenLength = StringToks[i].getLength(); - // Remember if we see any wide strings. - AnyWide |= StringToks[i].is(tok::wide_string_literal); + // Remember if we see any wide or utf-8/16/32 strings. + // Also check for illegal concatenations. + if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) { + if (isAscii()) { + Kind = StringToks[i].getKind(); + } else { + if (Diags) + Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM), + diag::err_unsupported_string_concat); + hadError = true; + } + } } // Include space for the null terminator. @@ -892,19 +972,14 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ // TODO: K&R warning: "traditional C rejects string constant concatenation" - // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not - // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true. - wchar_tByteWidth = ~0U; - if (AnyWide) { - wchar_tByteWidth = Target.getWCharWidth(); - assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!"); - wchar_tByteWidth /= 8; - } + // Get the width in bytes of char/wchar_t/char16_t/char32_t + CharByteWidth = getCharWidth(Kind, Target); + assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); + CharByteWidth /= 8; // The output buffer size needs to be large enough to hold wide characters. // This is a worst-case assumption which basically corresponds to L"" "long". - if (AnyWide) - SizeBound *= wchar_tByteWidth; + SizeBound *= CharByteWidth; // Size the temporary buffer to hold the result string data. ResultBuf.resize(SizeBound); @@ -929,78 +1004,82 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features, &StringInvalid); if (StringInvalid) { - hadError = 1; + hadError = true; continue; } const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. - bool wide = false; // TODO: Input character set mapping support. - // Skip L marker for wide strings. - if (ThisTokBuf[0] == 'L') { - wide = true; + // Skip marker for wide or unicode strings. + if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') { ++ThisTokBuf; + // Skip 8 of u8 marker for utf8 strings. + if (ThisTokBuf[0] == '8') + ++ThisTokBuf; } - assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); - ++ThisTokBuf; - - // Check if this is a pascal string - if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && - ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { + // Check for raw string + if (ThisTokBuf[0] == 'R') { + ThisTokBuf += 2; // skip R" - // If the \p sequence is found in the first token, we have a pascal string - // Otherwise, if we already have a pascal string, ignore the first \p - if (i == 0) { + const char *Prefix = ThisTokBuf; + while (ThisTokBuf[0] != '(') ++ThisTokBuf; - Pascal = true; - } else if (Pascal) - ThisTokBuf += 2; - } + ++ThisTokBuf; // skip '(' + + // remove same number of characters from the end + if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix)) + ThisTokEnd -= (ThisTokBuf - Prefix); + + // Copy the string over + CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)); + } else { + assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); + ++ThisTokBuf; // skip " + + // Check if this is a pascal string + if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && + ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { - while (ThisTokBuf != ThisTokEnd) { - // Is this a span of non-escape characters? - if (ThisTokBuf[0] != '\\') { - const char *InStart = ThisTokBuf; - do { + // If the \p sequence is found in the first token, we have a pascal string + // Otherwise, if we already have a pascal string, ignore the first \p + if (i == 0) { ++ThisTokBuf; - } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); - - // Copy the character span over. - unsigned Len = ThisTokBuf-InStart; - if (!AnyWide) { - memcpy(ResultPtr, InStart, Len); - ResultPtr += Len; - } else { - // Note: our internal rep of wide char tokens is always little-endian. - for (; Len; --Len, ++InStart) { - *ResultPtr++ = InStart[0]; - // Add zeros at the end. - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) - *ResultPtr++ = 0; - } - } - continue; - } - // Is this a Universal Character Name escape? - if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { - EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, - hadError, FullSourceLoc(StringToks[i].getLocation(),SM), - wide, Diags, Features); - continue; + Pascal = true; + } else if (Pascal) + ThisTokBuf += 2; } - // Otherwise, this is a non-UCN escape character. Process it. - unsigned ResultChar = - ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError, - FullSourceLoc(StringToks[i].getLocation(), SM), - AnyWide, Diags, Target); - // Note: our internal rep of wide char tokens is always little-endian. - *ResultPtr++ = ResultChar & 0xFF; + while (ThisTokBuf != ThisTokEnd) { + // Is this a span of non-escape characters? + if (ThisTokBuf[0] != '\\') { + const char *InStart = ThisTokBuf; + do { + ++ThisTokBuf; + } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); + + // Copy the character span over. + CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart)); + continue; + } + // Is this a Universal Character Name escape? + if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { + EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, + hadError, FullSourceLoc(StringToks[i].getLocation(),SM), + CharByteWidth, Diags, Features); + continue; + } + // Otherwise, this is a non-UCN escape character. Process it. + unsigned ResultChar = + ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError, + FullSourceLoc(StringToks[i].getLocation(), SM), + CharByteWidth*8, Diags); + + // Note: our internal rep of wide char tokens is always little-endian. + *ResultPtr++ = ResultChar & 0xFF; - if (AnyWide) { - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + for (unsigned i = 1, e = CharByteWidth; i != e; ++i) *ResultPtr++ = ResultChar >> i*8; } } @@ -1008,8 +1087,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ if (Pascal) { ResultBuf[0] = ResultPtr-&ResultBuf[0]-1; - if (AnyWide) - ResultBuf[0] /= wchar_tByteWidth; + ResultBuf[0] /= CharByteWidth; // Verify that pascal strings aren't too large. if (GetStringLength() > 256) { @@ -1018,7 +1096,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ diag::err_pascal_string_too_long) << SourceRange(StringToks[0].getLocation(), StringToks[NumStringToks-1].getLocation()); - hadError = 1; + hadError = true; return; } } else if (Diags) { @@ -1036,6 +1114,25 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ } +/// copyStringFragment - This function copies from Start to End into ResultPtr. +/// Performs widening for multi-byte characters. +void StringLiteralParser::CopyStringFragment(StringRef Fragment) { + // Copy the character span over. + if (CharByteWidth == 1) { + memcpy(ResultPtr, Fragment.data(), Fragment.size()); + ResultPtr += Fragment.size(); + } else { + // Note: our internal rep of wide char tokens is always little-endian. + for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) { + *ResultPtr++ = *I; + // Add zeros at the end. + for (unsigned i = 1, e = CharByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } + } +} + + /// getOffsetOfStringByte - This function returns the offset of the /// specified byte of the string data represented by Token. This handles /// advancing over escape sequences in the string. @@ -1052,7 +1149,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, if (StringInvalid) return 0; - assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet"); + assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && + SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet"); const char *SpellingStart = SpellingPtr; @@ -1077,7 +1175,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, bool HadError = false; ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, FullSourceLoc(Tok.getLocation(), SM), - false, Diags, Target); + CharByteWidth*8, Diags); assert(!HadError && "This method isn't valid on erroneous strings"); --ByteNo; } diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp index 968c15e3c27b..1846d1c05e30 100644 --- a/lib/Lex/MacroArgs.cpp +++ b/lib/Lex/MacroArgs.cpp @@ -15,13 +15,15 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/LexDiagnostic.h" + +#include <algorithm> + using namespace clang; /// MacroArgs ctor function - This destroys the vector passed in. MacroArgs *MacroArgs::create(const MacroInfo *MI, - const Token *UnexpArgTokens, - unsigned NumToks, bool VarargsElided, - Preprocessor &PP) { + llvm::ArrayRef<Token> UnexpArgTokens, + bool VarargsElided, Preprocessor &PP) { assert(MI->isFunctionLike() && "Can't have args for an object-like macro!"); MacroArgs **ResultEnt = 0; @@ -31,12 +33,12 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, // free list. If so, reuse it. for (MacroArgs **Entry = &PP.MacroArgCache; *Entry; Entry = &(*Entry)->ArgCache) - if ((*Entry)->NumUnexpArgTokens >= NumToks && + if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() && (*Entry)->NumUnexpArgTokens < ClosestMatch) { ResultEnt = Entry; // If we have an exact match, use it. - if ((*Entry)->NumUnexpArgTokens == NumToks) + if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size()) break; // Otherwise, use the best fit. ClosestMatch = (*Entry)->NumUnexpArgTokens; @@ -45,21 +47,22 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, MacroArgs *Result; if (ResultEnt == 0) { // Allocate memory for a MacroArgs object with the lexer tokens at the end. - Result = (MacroArgs*)malloc(sizeof(MacroArgs) + NumToks*sizeof(Token)); + Result = (MacroArgs*)malloc(sizeof(MacroArgs) + + UnexpArgTokens.size() * sizeof(Token)); // Construct the MacroArgs object. - new (Result) MacroArgs(NumToks, VarargsElided); + new (Result) MacroArgs(UnexpArgTokens.size(), VarargsElided); } else { Result = *ResultEnt; // Unlink this node from the preprocessors singly linked list. *ResultEnt = Result->ArgCache; - Result->NumUnexpArgTokens = NumToks; + Result->NumUnexpArgTokens = UnexpArgTokens.size(); Result->VarargsElided = VarargsElided; } // Copy the actual unexpanded tokens to immediately after the result ptr. - if (NumToks) - memcpy(const_cast<Token*>(Result->getUnexpArgument(0)), - UnexpArgTokens, NumToks*sizeof(Token)); + if (!UnexpArgTokens.empty()) + std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), + const_cast<Token*>(Result->getUnexpArgument(0))); return Result; } @@ -186,7 +189,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, /// Token MacroArgs::StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, - SourceLocation hashInstLoc) { + SourceLocation ExpansionLocStart, + SourceLocation ExpansionLocEnd) { Token Tok; Tok.startToken(); Tok.setKind(Charify ? tok::char_constant : tok::string_literal); @@ -208,13 +212,21 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, // by 6.10.3.2p2. if (Tok.is(tok::string_literal) || // "foo" Tok.is(tok::wide_string_literal) || // L"foo" - Tok.is(tok::char_constant)) { // 'x' and L'x'. + Tok.is(tok::utf8_string_literal) || // u8"foo" + Tok.is(tok::utf16_string_literal) || // u"foo" + Tok.is(tok::utf32_string_literal) || // U"foo" + Tok.is(tok::char_constant) || // 'x' + Tok.is(tok::wide_char_constant) || // L'x'. + Tok.is(tok::utf16_char_constant) || // u'x'. + Tok.is(tok::utf32_char_constant)) { // U'x'. bool Invalid = false; std::string TokStr = PP.getSpelling(Tok, &Invalid); if (!Invalid) { std::string Str = Lexer::Stringify(TokStr); Result.append(Str.begin(), Str.end()); } + } else if (Tok.is(tok::code_completion)) { + PP.CodeCompleteNaturalLanguage(); } else { // Otherwise, just append the token. Do some gymnastics to get the token // in place and avoid copies where possible. @@ -274,7 +286,8 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, } } - PP.CreateString(&Result[0], Result.size(), Tok, hashInstLoc); + PP.CreateString(&Result[0], Result.size(), Tok, + ExpansionLocStart, ExpansionLocEnd); return Tok; } @@ -282,7 +295,8 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, /// that has been 'stringified' as required by the # operator. const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo, Preprocessor &PP, - SourceLocation hashInstLoc) { + SourceLocation ExpansionLocStart, + SourceLocation ExpansionLocEnd) { assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!"); if (StringifiedArgs.empty()) { StringifiedArgs.resize(getNumArguments()); @@ -291,6 +305,8 @@ const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo, } if (StringifiedArgs[ArgNo].isNot(tok::string_literal)) StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP, - /*Charify=*/false, hashInstLoc); + /*Charify=*/false, + ExpansionLocStart, + ExpansionLocEnd); return StringifiedArgs[ArgNo]; } diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h index a962dacf7c93..cf86d710adb7 100644 --- a/lib/Lex/MacroArgs.h +++ b/lib/Lex/MacroArgs.h @@ -14,6 +14,8 @@ #ifndef LLVM_CLANG_MACROARGS_H #define LLVM_CLANG_MACROARGS_H +#include "llvm/ADT/ArrayRef.h" + #include <vector> namespace clang { @@ -58,9 +60,8 @@ public: /// MacroArgs ctor function - Create a new MacroArgs object with the specified /// macro and argument info. static MacroArgs *create(const MacroInfo *MI, - const Token *UnexpArgTokens, - unsigned NumArgTokens, bool VarargsElided, - Preprocessor &PP); + llvm::ArrayRef<Token> UnexpArgTokens, + bool VarargsElided, Preprocessor &PP); /// destroy - Destroy and deallocate the memory for this object. /// @@ -88,7 +89,8 @@ public: /// getStringifiedArgument - Compute, cache, and return the specified argument /// that has been 'stringified' as required by the # operator. const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP, - SourceLocation hashInstLoc); + SourceLocation ExpansionLocStart, + SourceLocation ExpansionLocEnd); /// getNumArguments - Return the number of arguments passed into this macro /// invocation. @@ -109,7 +111,8 @@ public: /// static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, - SourceLocation hashInstLoc); + SourceLocation ExpansionLocStart, + SourceLocation ExpansionLocEnd); /// deallocate - This should only be called by the Preprocessor when managing diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp index 0a16a2567219..5a7af5639830 100644 --- a/lib/Lex/MacroInfo.cpp +++ b/lib/Lex/MacroInfo.cpp @@ -21,6 +21,7 @@ MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) { IsGNUVarargs = false; IsBuiltinMacro = false; IsFromAST = false; + ChangedAfterLoad = false; IsDisabled = false; IsUsed = false; IsAllowRedefinitionsWithoutWarning = false; @@ -40,6 +41,7 @@ MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator) { IsGNUVarargs = MI.IsGNUVarargs; IsBuiltinMacro = MI.IsBuiltinMacro; IsFromAST = MI.IsFromAST; + ChangedAfterLoad = MI.ChangedAfterLoad; IsDisabled = MI.IsDisabled; IsUsed = MI.IsUsed; IsAllowRedefinitionsWithoutWarning = MI.IsAllowRedefinitionsWithoutWarning; @@ -68,9 +70,9 @@ unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const { assert((macroEnd.isFileID() || lastToken.is(tok::comment)) && "Macro defined in macro?"); std::pair<FileID, unsigned> - startInfo = SM.getDecomposedInstantiationLoc(macroStart); + startInfo = SM.getDecomposedExpansionLoc(macroStart); std::pair<FileID, unsigned> - endInfo = SM.getDecomposedInstantiationLoc(macroEnd); + endInfo = SM.getDecomposedExpansionLoc(macroEnd); assert(startInfo.first == endInfo.first && "Macro definition spanning multiple FileIDs ?"); assert(startInfo.second <= endInfo.second); diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp index 33106591c3ba..986341b98668 100644 --- a/lib/Lex/PPCaching.cpp +++ b/lib/Lex/PPCaching.cpp @@ -74,6 +74,8 @@ void Preprocessor::EnterCachingLexMode() { return; PushIncludeMacroStack(); + if (CurLexerKind != CLK_LexAfterModuleImport) + CurLexerKind = CLK_CachingLexer; } diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index 4af5fabe5c80..de50c750e4d6 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -17,6 +17,7 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/ModuleLoader.h" #include "clang/Lex/Pragma.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" @@ -102,8 +103,8 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { if (MacroNameTok.is(tok::code_completion)) { if (CodeComplete) CodeComplete->CodeCompleteMacroName(isDefineUndef == 1); + setCodeCompletionReached(); LexUnexpandedToken(MacroNameTok); - return; } // Missing macro name? @@ -192,7 +193,8 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { /// the first valid token. void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, bool FoundNonSkipPortion, - bool FoundElse) { + bool FoundElse, + SourceLocation ElseLoc) { ++NumSkipped; assert(CurTokenLexer == 0 && CurPPLexer && "Lexing a macro, not a file?"); @@ -214,6 +216,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, if (Tok.is(tok::code_completion)) { if (CodeComplete) CodeComplete->CodeCompleteInConditionalExclusion(); + setCodeCompletionReached(); continue; } @@ -222,7 +225,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // Emit errors for each unterminated conditional on the stack, including // the current one. while (!CurPPLexer->ConditionalStack.empty()) { - if (!isCodeCompletionFile(Tok.getLocation())) + if (CurLexer->getFileLoc() != CodeCompletionFileLoc) Diag(CurPPLexer->ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); CurPPLexer->ConditionalStack.pop_back(); @@ -275,9 +278,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // that we can't use Tok.getIdentifierInfo() because its lookup is disabled // when skipping. char DirectiveBuf[20]; - llvm::StringRef Directive; + StringRef Directive; if (!Tok.needsCleaning() && Tok.getLength() < 20) { - Directive = llvm::StringRef(RawCharData, Tok.getLength()); + Directive = StringRef(RawCharData, Tok.getLength()); } else { std::string DirectiveStr = getSpelling(Tok); unsigned IdLen = DirectiveStr.size(); @@ -288,11 +291,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, continue; } memcpy(DirectiveBuf, &DirectiveStr[0], IdLen); - Directive = llvm::StringRef(DirectiveBuf, IdLen); + Directive = StringRef(DirectiveBuf, IdLen); } if (Directive.startswith("if")) { - llvm::StringRef Sub = Directive.substr(2); + StringRef Sub = Directive.substr(2); if (Sub.empty() || // "if" Sub == "def" || // "ifdef" Sub == "ndef") { // "ifndef" @@ -307,7 +310,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, Callbacks->Endif(); } } else if (Directive[0] == 'e') { - llvm::StringRef Sub = Directive.substr(1); + StringRef Sub = Directive.substr(1); if (Sub == "ndif") { // "endif" CheckEndOfDirective("endif"); PPConditionalInfo CondInfo; @@ -387,6 +390,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // of the file, just stop skipping and return to lexing whatever came after // the #if block. CurPPLexer->LexingRawMode = false; + + if (Callbacks) { + SourceLocation BeginLoc = ElseLoc.isValid() ? ElseLoc : IfTokenLoc; + Callbacks->SourceRangeSkipped(SourceRange(BeginLoc, Tok.getLocation())); + } } void Preprocessor::PTHSkipExcludedConditionalBlock() { @@ -472,12 +480,13 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() { /// return null on failure. isAngled indicates whether the file reference is /// for system #include's or not (i.e. using <> instead of ""). const FileEntry *Preprocessor::LookupFile( - llvm::StringRef Filename, + StringRef Filename, bool isAngled, const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir, - llvm::SmallVectorImpl<char> *SearchPath, - llvm::SmallVectorImpl<char> *RelativePath) { + SmallVectorImpl<char> *SearchPath, + SmallVectorImpl<char> *RelativePath, + StringRef *SuggestedModule) { // If the header lookup mechanism may be relative to the current file, pass in // info about where the current file is. const FileEntry *CurFileEnt = 0; @@ -501,12 +510,13 @@ const FileEntry *Preprocessor::LookupFile( CurDir = CurDirLookup; const FileEntry *FE = HeaderInfo.LookupFile( Filename, isAngled, FromDir, CurDir, CurFileEnt, - SearchPath, RelativePath); + SearchPath, RelativePath, SuggestedModule); if (FE) return FE; // Otherwise, see if this is a subframework header. If so, this is relative // to one of the headers on the #include stack. Walk the list of the current // headers on the #include stack and pass them to HeaderInfo. + // FIXME: SuggestedModule! if (IsFileLexer()) { if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID()))) if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt, @@ -581,6 +591,7 @@ TryAgain: if (CodeComplete) CodeComplete->CodeCompleteDirective( CurPPLexer->getConditionalStackDepth() > 0); + setCodeCompletionReached(); return; case tok::numeric_constant: // # 7 GNU line marker directive. if (getLangOptions().AsmPreprocessor) @@ -652,6 +663,9 @@ TryAgain: case tok::pp_unassert: //isExtension = true; // FIXME: implement #unassert break; + + case tok::pp___export_macro__: + return HandleMacroExportDirective(Result); } break; } @@ -758,9 +772,13 @@ void Preprocessor::HandleLineDirective(Token &Tok) { // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a // number greater than 2147483647". C90 requires that the line # be <= 32767. - unsigned LineLimit = Features.C99 ? 2147483648U : 32768U; + unsigned LineLimit = 32768U; + if (Features.C99 || Features.CPlusPlus0x) + LineLimit = 2147483648U; if (LineNo >= LineLimit) Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit; + else if (Features.CPlusPlus0x && LineNo >= 32768U) + Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big); int FilenameID = -1; Token StrTok; @@ -777,7 +795,7 @@ void Preprocessor::HandleLineDirective(Token &Tok) { } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(&StrTok, 1, *this); - assert(!Literal.AnyWide && "Didn't allow wide strings in"); + assert(Literal.isAscii() && "Didn't allow wide strings in"); if (Literal.hadError) return DiscardUntilEndOfDirective(); if (Literal.Pascal) { @@ -825,7 +843,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, // If we are leaving the current presumed file, check to make sure the // presumed include stack isn't empty! FileID CurFileID = - SM.getDecomposedInstantiationLoc(FlagTok.getLocation()).first; + SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first; PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation()); if (PLoc.isInvalid()) return true; @@ -834,7 +852,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, // different physical file, then we aren't in a "1" line marker flag region. SourceLocation IncLoc = PLoc.getIncludeLoc(); if (IncLoc.isInvalid() || - SM.getDecomposedInstantiationLoc(IncLoc).first != CurFileID) { + SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) { PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop); PP.DiscardUntilEndOfDirective(); return true; @@ -910,7 +928,7 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(&StrTok, 1, *this); - assert(!Literal.AnyWide && "Didn't allow wide strings in"); + assert(Literal.isAscii() && "Didn't allow wide strings in"); if (Literal.hadError) return DiscardUntilEndOfDirective(); if (Literal.Pascal) { @@ -1000,6 +1018,37 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { } } +/// \brief Handle a #__export_macro__ directive. +void Preprocessor::HandleMacroExportDirective(Token &Tok) { + Token MacroNameTok; + ReadMacroName(MacroNameTok, 2); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.is(tok::eod)) + return; + + // Check to see if this is the last token on the #__export_macro__ line. + CheckEndOfDirective("__export_macro__"); + + // Okay, we finally have a valid identifier to undef. + MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); + + // If the macro is not defined, this is an error. + if (MI == 0) { + Diag(MacroNameTok, diag::err_pp_export_non_macro) + << MacroNameTok.getIdentifierInfo(); + return; + } + + // Note that this macro has now been exported. + MI->setExportLocation(MacroNameTok.getLocation()); + + // If this macro definition came from a PCH file, mark it + // as having changed since serialization. + if (MI->isFromAST()) + MI->setChangedAfterLoad(); +} + //===----------------------------------------------------------------------===// // Preprocessor Include Directive Handling. //===----------------------------------------------------------------------===// @@ -1011,7 +1060,7 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { /// spelling of the filename, but is also expected to handle the case when /// this method decides to use a different buffer. bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, - llvm::StringRef &Buffer) { + StringRef &Buffer) { // Get the text form of the filename. assert(!Buffer.empty() && "Can't have tokens with empty spellings!"); @@ -1020,27 +1069,27 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, if (Buffer[0] == '<') { if (Buffer.back() != '>') { Diag(Loc, diag::err_pp_expects_filename); - Buffer = llvm::StringRef(); + Buffer = StringRef(); return true; } isAngled = true; } else if (Buffer[0] == '"') { if (Buffer.back() != '"') { Diag(Loc, diag::err_pp_expects_filename); - Buffer = llvm::StringRef(); + Buffer = StringRef(); return true; } isAngled = false; } else { Diag(Loc, diag::err_pp_expects_filename); - Buffer = llvm::StringRef(); + Buffer = StringRef(); return true; } // Diagnose #include "" as invalid. if (Buffer.size() <= 2) { Diag(Loc, diag::err_pp_empty_filename); - Buffer = llvm::StringRef(); + Buffer = StringRef(); return true; } @@ -1070,6 +1119,7 @@ bool Preprocessor::ConcatenateIncludeName( // FIXME: Provide code completion for #includes. if (CurTok.is(tok::code_completion)) { + setCodeCompletionReached(); Lex(CurTok); continue; } @@ -1122,7 +1172,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // Reserve a buffer to get the spelling. llvm::SmallString<128> FilenameBuffer; - llvm::StringRef Filename; + StringRef Filename; SourceLocation End; switch (FilenameTok.getKind()) { @@ -1171,23 +1221,44 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, return; } + // Complain about attempts to #include files in an audit pragma. + if (PragmaARCCFCodeAuditedLoc.isValid()) { + Diag(HashLoc, diag::err_pp_include_in_arc_cf_code_audited); + Diag(PragmaARCCFCodeAuditedLoc, diag::note_pragma_entered_here); + + // Immediately leave the pragma. + PragmaARCCFCodeAuditedLoc = SourceLocation(); + } + // Search include directories. const DirectoryLookup *CurDir; llvm::SmallString<1024> SearchPath; llvm::SmallString<1024> RelativePath; // We get the raw path only if we have 'Callbacks' to which we later pass // the path. + StringRef SuggestedModule; const FileEntry *File = LookupFile( Filename, isAngled, LookupFrom, CurDir, - Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL); - + Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL, + AutoModuleImport? &SuggestedModule : 0); + + // If we are supposed to import a module rather than including the header, + // do so now. + if (!SuggestedModule.empty()) { + TheModuleLoader.loadModule(IncludeTok.getLocation(), + Identifiers.get(SuggestedModule), + FilenameTok.getLocation()); + return; + } + // Notify the callback object that we've seen an inclusion directive. if (Callbacks) Callbacks->InclusionDirective(HashLoc, IncludeTok, Filename, isAngled, File, End, SearchPath, RelativePath); if (File == 0) { - Diag(FilenameTok, diag::warn_pp_file_not_found) << Filename; + if (!SuppressIncludeNotFoundError) + Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; return; } @@ -1284,7 +1355,7 @@ void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc, /// closing ), updating MI with what we learn. Return true if an error occurs /// parsing the arg list. bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { - llvm::SmallVector<IdentifierInfo*, 32> Arguments; + SmallVector<IdentifierInfo*, 32> Arguments; Token Tok; while (1) { @@ -1298,8 +1369,10 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { Diag(Tok, diag::err_pp_expected_ident_in_arg_list); return true; case tok::ellipsis: // #define X(... -> C99 varargs - // Warn if use of C99 feature in non-C99 mode. - if (!Features.C99) Diag(Tok, diag::ext_variadic_macro); + if (!Features.C99) + Diag(Tok, Features.CPlusPlus0x ? + diag::warn_cxx98_compat_variadic_macro : + diag::ext_variadic_macro); // Lex the token after the identifier. LexUnexpandedToken(Tok); @@ -1423,7 +1496,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // Read the first token after the arg list for down below. LexUnexpandedToken(Tok); - } else if (Features.C99) { + } else if (Features.C99 || Features.CPlusPlus0x) { // C99 requires whitespace between the macro definition and the body. Emit // a diagnostic for something like "#define X+". Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); @@ -1564,7 +1637,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // warn-because-unused-macro set. If it gets used it will be removed from set. if (isInPrimaryFile() && // don't warn for include'd macros. Diags->getDiagnosticLevel(diag::pp_macro_not_used, - MI->getDefinitionLoc()) != Diagnostic::Ignored) { + MI->getDefinitionLoc()) != DiagnosticsEngine::Ignored) { MI->setIsWarnIfUnused(true); WarnUnusedMacroLocs.insert(MI->getDefinitionLoc()); } @@ -1765,7 +1838,7 @@ void Preprocessor::HandleElseDirective(Token &Result) { // Finally, skip the rest of the contents of this block. SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, - /*FoundElse*/true); + /*FoundElse*/true, Result.getLocation()); if (Callbacks) Callbacks->Else(); @@ -1798,7 +1871,8 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) { // Finally, skip the rest of the contents of this block. SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, - /*FoundElse*/CI.FoundElse); + /*FoundElse*/CI.FoundElse, + ElifToken.getLocation()); if (Callbacks) Callbacks->Elif(SourceRange(ConditionalBegin, ConditionalEnd)); diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp index 8fcfc70a7c67..20f624a0bb12 100644 --- a/lib/Lex/PPExpressions.cpp +++ b/lib/Lex/PPExpressions.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Lex/LexDiagnostic.h" #include "llvm/ADT/APSInt.h" +#include "llvm/Support/ErrorHandling.h" using namespace clang; namespace { @@ -83,20 +84,21 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Result.setBegin(PeekTok.getLocation()); // Get the next token, don't expand it. - PP.LexUnexpandedToken(PeekTok); + PP.LexUnexpandedNonComment(PeekTok); // Two options, it can either be a pp-identifier or a (. SourceLocation LParenLoc; if (PeekTok.is(tok::l_paren)) { // Found a paren, remember we saw it and skip it. LParenLoc = PeekTok.getLocation(); - PP.LexUnexpandedToken(PeekTok); + PP.LexUnexpandedNonComment(PeekTok); } if (PeekTok.is(tok::code_completion)) { if (PP.getCodeCompletionHandler()) PP.getCodeCompletionHandler()->CodeCompleteMacroName(false); - PP.LexUnexpandedToken(PeekTok); + PP.setCodeCompletionReached(); + PP.LexUnexpandedNonComment(PeekTok); } // If we don't have a pp-identifier now, this is an error. @@ -115,12 +117,16 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.markMacroAsUsed(Macro); } - // Consume identifier. - Result.setEnd(PeekTok.getLocation()); - PP.LexUnexpandedToken(PeekTok); + // Invoke the 'defined' callback. + if (PPCallbacks *Callbacks = PP.getPPCallbacks()) + Callbacks->Defined(PeekTok); // If we are in parens, ensure we have a trailing ). if (LParenLoc.isValid()) { + // Consume identifier. + Result.setEnd(PeekTok.getLocation()); + PP.LexUnexpandedNonComment(PeekTok); + if (PeekTok.isNot(tok::r_paren)) { PP.Diag(PeekTok.getLocation(), diag::err_pp_missing_rparen) << "defined"; PP.Diag(LParenLoc, diag::note_matching) << "("; @@ -129,6 +135,10 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, // Consume the ). Result.setEnd(PeekTok.getLocation()); PP.LexNonComment(PeekTok); + } else { + // Consume identifier. + Result.setEnd(PeekTok.getLocation()); + PP.LexNonComment(PeekTok); } // Success, remember that we saw defined(X). @@ -152,7 +162,8 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, if (PeekTok.is(tok::code_completion)) { if (PP.getCodeCompletionHandler()) PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression(); - PP.LexUnexpandedToken(PeekTok); + PP.setCodeCompletionReached(); + PP.LexNonComment(PeekTok); } // If this token's spelling is a pp-identifier, check to see if it is @@ -188,7 +199,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, case tok::numeric_constant: { llvm::SmallString<64> IntegerBuffer; bool NumberInvalid = false; - llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, + StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, &NumberInvalid); if (NumberInvalid) return true; // a diagnostic was already reported @@ -205,9 +216,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, assert(Literal.isIntegerLiteral() && "Unknown ppnumber"); // long long is a C99 feature. - if (!PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus0x - && Literal.isLongLong) - PP.Diag(PeekTok, diag::ext_longlong); + if (!PP.getLangOptions().C99 && Literal.isLongLong) + PP.Diag(PeekTok, PP.getLangOptions().CPlusPlus0x ? + diag::warn_cxx98_compat_longlong : diag::ext_longlong); // Parse the integer literal into Result. if (Literal.GetIntegerValue(Result.Val)) { @@ -236,15 +247,18 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.LexNonComment(PeekTok); return false; } - case tok::char_constant: { // 'x' + case tok::char_constant: // 'x' + case tok::wide_char_constant: { // L'x' + case tok::utf16_char_constant: // u'x' + case tok::utf32_char_constant: // U'x' llvm::SmallString<32> CharBuffer; bool CharInvalid = false; - llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid); + StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid); if (CharInvalid) return true; CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), - PeekTok.getLocation(), PP); + PeekTok.getLocation(), PP, PeekTok.getKind()); if (Literal.hadError()) return true; // A diagnostic was already emitted. @@ -255,6 +269,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, NumBits = TI.getIntWidth(); else if (Literal.isWide()) NumBits = TI.getWCharWidth(); + else if (Literal.isUTF16()) + NumBits = TI.getChar16Width(); + else if (Literal.isUTF32()) + NumBits = TI.getChar32Width(); else NumBits = TI.getCharWidth(); @@ -262,8 +280,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, llvm::APSInt Val(NumBits); // Set the value. Val = Literal.getValue(); - // Set the signedness. - Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned); + // Set the signedness. UTF-16 and UTF-32 are always unsigned + if (!Literal.isUTF16() && !Literal.isUTF32()) + Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned); if (Result.Val.getBitWidth() > Val.getBitWidth()) { Result.Val = Val.extend(Result.Val.getBitWidth()); @@ -521,7 +540,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, bool Overflow = false; switch (Operator) { - default: assert(0 && "Unknown operator token!"); + default: llvm_unreachable("Unknown operator token!"); case tok::percent: if (RHS.Val != 0) Res = LHS.Val % RHS.Val; @@ -704,7 +723,7 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Peek ahead one token. Token Tok; - Lex(Tok); + LexNonComment(Tok); // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. unsigned BitWidth = getTargetInfo().getIntMaxTWidth(); @@ -759,4 +778,3 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; return ResVal.Val != 0; } - diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index bf28199b888a..25a98ae47b6e 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -89,7 +89,14 @@ void Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir, << std::string(SourceMgr.getBufferName(FileStart)) << ""; return; } - + + if (isCodeCompletionEnabled() && + SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) { + CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID); + CodeCompletionLoc = + CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset); + } + EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir); return; } @@ -106,7 +113,9 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurLexer.reset(TheLexer); CurPPLexer = TheLexer; CurDirLookup = CurDir; - + if (CurLexerKind != CLK_LexAfterModuleImport) + CurLexerKind = CLK_Lexer; + // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { SrcMgr::CharacteristicKind FileType = @@ -128,7 +137,9 @@ void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL, CurDirLookup = CurDir; CurPTHLexer.reset(PL); CurPPLexer = CurPTHLexer.get(); - + if (CurLexerKind != CLK_LexAfterModuleImport) + CurLexerKind = CLK_PTHLexer; + // Notify the client, if desired, that we are in a new source file. if (Callbacks) { FileID FID = CurPPLexer->getFileID(); @@ -152,6 +163,8 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]); CurTokenLexer->Init(Tok, ILEnd, Args); } + if (CurLexerKind != CLK_LexAfterModuleImport) + CurLexerKind = CLK_TokenLexer; } /// EnterTokenStream - Add a "macro" context to the top of the include stack, @@ -181,6 +194,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]); CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens); } + if (CurLexerKind != CLK_LexAfterModuleImport) + CurLexerKind = CLK_TokenLexer; } /// HandleEndOfFile - This callback is invoked when the lexer hits the end of @@ -201,9 +216,50 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { } } + // Complain about reaching an EOF within arc_cf_code_audited. + if (PragmaARCCFCodeAuditedLoc.isValid()) { + Diag(PragmaARCCFCodeAuditedLoc, diag::err_pp_eof_in_arc_cf_code_audited); + + // Recover by leaving immediately. + PragmaARCCFCodeAuditedLoc = SourceLocation(); + } + // If this is a #include'd file, pop it off the include stack and continue // lexing the #includer file. if (!IncludeMacroStack.empty()) { + + // If we lexed the code-completion file, act as if we reached EOF. + if (isCodeCompletionEnabled() && CurPPLexer && + SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) == + CodeCompletionFileLoc) { + if (CurLexer) { + Result.startToken(); + CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); + CurLexer.reset(); + } else { + assert(CurPTHLexer && "Got EOF but no current lexer set!"); + CurPTHLexer->getEOF(Result); + CurPTHLexer.reset(); + } + + CurPPLexer = 0; + return true; + } + + if (!isEndOfMacro && CurPPLexer && + SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) { + // Notify SourceManager to record the number of FileIDs that were created + // during lexing of the #include'd file. + unsigned NumFIDs = + SourceMgr.local_sloc_entry_size() - + CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/; + SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs); + } + + FileID ExitedFID; + if (Callbacks && !isEndOfMacro && CurPPLexer) + ExitedFID = CurPPLexer->getFileID(); + // We're done with the #included file. RemoveTopOfLexerStack(); @@ -212,7 +268,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { SrcMgr::CharacteristicKind FileType = SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation()); Callbacks->FileChanged(CurPPLexer->getSourceLocation(), - PPCallbacks::ExitFile, FileType); + PPCallbacks::ExitFile, FileType, ExitedFID); } // Client should lex another token. diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index ecd4d4cfc68b..e10c95c75f25 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -21,10 +21,12 @@ #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/ExternalPreprocessorSource.h" +#include "clang/Lex/LiteralSupport.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" #include <cstdio> #include <ctime> using namespace clang; @@ -91,9 +93,10 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__has_attribute = RegisterBuiltinMacro(*this, "__has_attribute"); Ident__has_include = RegisterBuiltinMacro(*this, "__has_include"); Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next"); + Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning"); // Microsoft Extensions. - if (Features.Microsoft) + if (Features.MicrosoftExt) Ident__pragma = RegisterBuiltinMacro(*this, "__pragma"); else Ident__pragma = 0; @@ -185,7 +188,8 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. if (MI->isBuiltinMacro()) { - if (Callbacks) Callbacks->MacroExpands(Identifier, MI); + if (Callbacks) Callbacks->MacroExpands(Identifier, MI, + Identifier.getLocation()); ExpandBuiltinMacro(Identifier); return false; } @@ -226,13 +230,14 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // Notice that this macro has been used. markMacroAsUsed(MI); - if (Callbacks) Callbacks->MacroExpands(Identifier, MI); - - // If we started lexing a macro, enter the macro expansion body. - // Remember where the token is expanded. SourceLocation ExpandLoc = Identifier.getLocation(); + if (Callbacks) Callbacks->MacroExpands(Identifier, MI, + SourceRange(ExpandLoc, ExpansionEnd)); + + // If we started lexing a macro, enter the macro expansion body. + // If this macro expands to no tokens, don't bother to push it onto the // expansion stack, only to take it right back off. if (MI->getNumTokens() == 0) { @@ -255,7 +260,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace); } Identifier.setFlag(Token::LeadingEmptyMacro); - LastEmptyMacroExpansionLoc = ExpandLoc; ++NumFastMacroExpanded; return false; @@ -284,8 +288,8 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // Update the tokens location to include both its expansion and physical // locations. SourceLocation Loc = - SourceMgr.createInstantiationLoc(Identifier.getLocation(), ExpandLoc, - ExpansionEnd,Identifier.getLength()); + SourceMgr.createExpansionLoc(Identifier.getLocation(), ExpandLoc, + ExpansionEnd,Identifier.getLength()); Identifier.setLocation(Loc); // If this is a disabled macro or #define X X, we must mark the result as @@ -333,7 +337,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // ArgTokens - Build up a list of tokens that make up each argument. Each // argument is separated by an EOF token. Use a SmallVector so we can avoid // heap allocations in the common case. - llvm::SmallVector<Token, 64> ArgTokens; + SmallVector<Token, 64> ArgTokens; unsigned NumActuals = 0; while (Tok.isNot(tok::r_paren)) { @@ -352,13 +356,6 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // an argument value in a macro could expand to ',' or '(' or ')'. LexUnexpandedToken(Tok); - if (Tok.is(tok::code_completion)) { - if (CodeComplete) - CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(), - MI, NumActuals); - LexUnexpandedToken(Tok); - } - if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n" Diag(MacroName, diag::err_unterm_macro_invoc); // Do not lose the EOF/EOD. Return it to the client. @@ -393,7 +390,15 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo())) if (!MI->isEnabled()) Tok.setFlag(Token::DisableExpand); + } else if (Tok.is(tok::code_completion)) { + if (CodeComplete) + CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(), + MI, NumActuals); + // Don't mark that we reached the code-completion point because the + // parser is going to handle the token and there will be another + // code-completion callback. } + ArgTokens.push_back(Tok); } @@ -416,8 +421,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // Empty arguments are standard in C99 and C++0x, and are supported as an extension in // other modes. - if (ArgTokens.size() == ArgTokenStart && !Features.C99 && !Features.CPlusPlus0x) - Diag(Tok, diag::ext_empty_fnmacro_arg); + if (ArgTokens.size() == ArgTokenStart && !Features.C99) + Diag(Tok, Features.CPlusPlus0x ? + diag::warn_cxx98_compat_empty_fnmacro_arg : + diag::ext_empty_fnmacro_arg); // Add a marker EOF token to the end of the token list for this argument. Token EOFTok; @@ -487,8 +494,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, return 0; } - return MacroArgs::create(MI, ArgTokens.data(), ArgTokens.size(), - isVarargsElided, *this); + return MacroArgs::create(MI, ArgTokens, isVarargsElided, *this); } /// \brief Keeps macro expanded tokens for TokenLexers. @@ -497,7 +503,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, /// going to lex in the cache and when it finishes the tokens are removed /// from the end of the cache. Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer, - llvm::ArrayRef<Token> tokens) { + ArrayRef<Token> tokens) { assert(tokLexer); if (tokens.empty()) return 0; @@ -597,34 +603,48 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("objc_arc", LangOpts.ObjCAutoRefCount) .Case("objc_arc_weak", LangOpts.ObjCAutoRefCount && LangOpts.ObjCRuntimeHasWeak) + .Case("objc_fixed_enum", LangOpts.ObjC2) + .Case("objc_instancetype", LangOpts.ObjC2) .Case("objc_nonfragile_abi", LangOpts.ObjCNonFragileABI) .Case("objc_weak_class", LangOpts.ObjCNonFragileABI) .Case("ownership_holds", true) .Case("ownership_returns", true) .Case("ownership_takes", true) // C1X features + .Case("c_alignas", LangOpts.C1X) .Case("c_generic_selections", LangOpts.C1X) .Case("c_static_assert", LangOpts.C1X) // C++0x features .Case("cxx_access_control_sfinae", LangOpts.CPlusPlus0x) .Case("cxx_alias_templates", LangOpts.CPlusPlus0x) + .Case("cxx_alignas", LangOpts.CPlusPlus0x) .Case("cxx_attributes", LangOpts.CPlusPlus0x) .Case("cxx_auto_type", LangOpts.CPlusPlus0x) + //.Case("cxx_constexpr", false); .Case("cxx_decltype", LangOpts.CPlusPlus0x) .Case("cxx_default_function_template_args", LangOpts.CPlusPlus0x) .Case("cxx_delegating_constructors", LangOpts.CPlusPlus0x) .Case("cxx_deleted_functions", LangOpts.CPlusPlus0x) + .Case("cxx_explicit_conversions", LangOpts.CPlusPlus0x) + //.Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x) + .Case("cxx_implicit_moves", LangOpts.CPlusPlus0x) + //.Case("cxx_inheriting_constructors", false) .Case("cxx_inline_namespaces", LangOpts.CPlusPlus0x) //.Case("cxx_lambdas", false) + .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus0x) .Case("cxx_noexcept", LangOpts.CPlusPlus0x) .Case("cxx_nullptr", LangOpts.CPlusPlus0x) .Case("cxx_override_control", LangOpts.CPlusPlus0x) .Case("cxx_range_for", LangOpts.CPlusPlus0x) + //.Case("cxx_raw_string_literals", false) .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus0x) .Case("cxx_rvalue_references", LangOpts.CPlusPlus0x) .Case("cxx_strong_enums", LangOpts.CPlusPlus0x) .Case("cxx_static_assert", LangOpts.CPlusPlus0x) .Case("cxx_trailing_return", LangOpts.CPlusPlus0x) + //.Case("cxx_unicode_literals", false) + //.Case("cxx_unrestricted_unions", false) + //.Case("cxx_user_literals", false) .Case("cxx_variadic_templates", LangOpts.CPlusPlus0x) // Type traits .Case("has_nothrow_assign", LangOpts.CPlusPlus) @@ -639,16 +659,31 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("is_base_of", LangOpts.CPlusPlus) .Case("is_class", LangOpts.CPlusPlus) .Case("is_convertible_to", LangOpts.CPlusPlus) - .Case("is_empty", LangOpts.CPlusPlus) + // __is_empty is available only if the horrible + // "struct __is_empty" parsing hack hasn't been needed in this + // translation unit. If it has, __is_empty reverts to a normal + // identifier and __has_feature(is_empty) evaluates false. + .Case("is_empty", + LangOpts.CPlusPlus && + PP.getIdentifierInfo("__is_empty")->getTokenID() + != tok::identifier) .Case("is_enum", LangOpts.CPlusPlus) .Case("is_literal", LangOpts.CPlusPlus) .Case("is_standard_layout", LangOpts.CPlusPlus) - .Case("is_pod", LangOpts.CPlusPlus) + // __is_pod is available only if the horrible + // "struct __is_pod" parsing hack hasn't been needed in this + // translation unit. If it has, __is_pod reverts to a normal + // identifier and __has_feature(is_pod) evaluates false. + .Case("is_pod", + LangOpts.CPlusPlus && + PP.getIdentifierInfo("__is_pod")->getTokenID() + != tok::identifier) .Case("is_polymorphic", LangOpts.CPlusPlus) .Case("is_trivial", LangOpts.CPlusPlus) .Case("is_trivially_copyable", LangOpts.CPlusPlus) .Case("is_union", LangOpts.CPlusPlus) .Case("tls", PP.getTargetInfo().isTLSSupported()) + .Case("underlying_type", LangOpts.CPlusPlus) .Default(false); } @@ -661,7 +696,8 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) { // If the use of an extension results in an error diagnostic, extensions are // effectively unavailable, so just return false here. - if (PP.getDiagnostics().getExtensionHandlingBehavior()==Diagnostic::Ext_Error) + if (PP.getDiagnostics().getExtensionHandlingBehavior() == + DiagnosticsEngine::Ext_Error) return false; const LangOptions &LangOpts = PP.getLangOptions(); @@ -670,12 +706,16 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) { // must be less restrictive than HasFeature's. return llvm::StringSwitch<bool>(II->getName()) // C1X features supported by other languages as extensions. + .Case("c_alignas", true) .Case("c_generic_selections", true) .Case("c_static_assert", true) // C++0x features supported by other languages as extensions. .Case("cxx_deleted_functions", LangOpts.CPlusPlus) + .Case("cxx_explicit_conversions", LangOpts.CPlusPlus) .Case("cxx_inline_namespaces", LangOpts.CPlusPlus) + .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus) .Case("cxx_override_control", LangOpts.CPlusPlus) + .Case("cxx_range_for", LangOpts.CPlusPlus) .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus) .Case("cxx_rvalue_references", LangOpts.CPlusPlus) .Default(false); @@ -714,7 +754,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, // Reserve a buffer to get the spelling. llvm::SmallString<128> FilenameBuffer; - llvm::StringRef Filename; + StringRef Filename; SourceLocation EndLoc; switch (Tok.getKind()) { @@ -753,7 +793,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, // Search include directories. const DirectoryLookup *CurDir; const FileEntry *File = - PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL); + PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL); // Get the result value. Result = true means the file exists. bool Result = File != 0; @@ -837,7 +877,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // can matter for a function-like macro that expands to contain __LINE__. // Skip down through expansion points until we find a file loc for the // end of the expansion history. - Loc = SourceMgr.getInstantiationRange(Loc).second; + Loc = SourceMgr.getExpansionRange(Loc).second; PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc); // __LINE__ expands to a simple numeric value. @@ -874,18 +914,18 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { ComputeDATE_TIME(DATELoc, TIMELoc, *this); Tok.setKind(tok::string_literal); Tok.setLength(strlen("\"Mmm dd yyyy\"")); - Tok.setLocation(SourceMgr.createInstantiationLoc(DATELoc, Tok.getLocation(), - Tok.getLocation(), - Tok.getLength())); + Tok.setLocation(SourceMgr.createExpansionLoc(DATELoc, Tok.getLocation(), + Tok.getLocation(), + Tok.getLength())); return; } else if (II == Ident__TIME__) { if (!TIMELoc.isValid()) ComputeDATE_TIME(DATELoc, TIMELoc, *this); Tok.setKind(tok::string_literal); Tok.setLength(strlen("\"hh:mm:ss\"")); - Tok.setLocation(SourceMgr.createInstantiationLoc(TIMELoc, Tok.getLocation(), - Tok.getLocation(), - Tok.getLength())); + Tok.setLocation(SourceMgr.createExpansionLoc(TIMELoc, Tok.getLocation(), + Tok.getLocation(), + Tok.getLength())); return; } else if (II == Ident__INCLUDE_LEVEL__) { // Compute the presumed include depth of this token. This can be affected @@ -923,7 +963,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Result = "??? ??? ?? ??:??:?? ????\n"; } // Surround the string with " and strip the trailing newline. - OS << '"' << llvm::StringRef(Result, strlen(Result)-1) << '"'; + OS << '"' << StringRef(Result, strlen(Result)-1) << '"'; Tok.setKind(tok::string_literal); } else if (II == Ident__COUNTER__) { // __COUNTER__ expands to a simple numeric value. @@ -983,10 +1023,78 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Value = EvaluateHasIncludeNext(Tok, II, *this); OS << (int)Value; Tok.setKind(tok::numeric_constant); + } else if (II == Ident__has_warning) { + // The argument should be a parenthesized string literal. + // The argument to these builtins should be a parenthesized identifier. + SourceLocation StartLoc = Tok.getLocation(); + bool IsValid = false; + bool Value = false; + // Read the '('. + Lex(Tok); + do { + if (Tok.is(tok::l_paren)) { + // Read the string. + Lex(Tok); + + // We need at least one string literal. + if (!Tok.is(tok::string_literal)) { + StartLoc = Tok.getLocation(); + IsValid = false; + // Eat tokens until ')'. + do Lex(Tok); while (!(Tok.is(tok::r_paren) || Tok.is(tok::eod))); + break; + } + + // String concatenation allows multiple strings, which can even come + // from macro expansion. + SmallVector<Token, 4> StrToks; + while (Tok.is(tok::string_literal)) { + StrToks.push_back(Tok); + LexUnexpandedToken(Tok); + } + + // Is the end a ')'? + if (!(IsValid = Tok.is(tok::r_paren))) + break; + + // Concatenate and parse the strings. + StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); + assert(Literal.isAscii() && "Didn't allow wide strings in"); + if (Literal.hadError) + break; + if (Literal.Pascal) { + Diag(Tok, diag::warn_pragma_diagnostic_invalid); + break; + } + + StringRef WarningName(Literal.GetString()); + + if (WarningName.size() < 3 || WarningName[0] != '-' || + WarningName[1] != 'W') { + Diag(StrToks[0].getLocation(), diag::warn_has_warning_invalid_option); + break; + } + + // Finally, check if the warning flags maps to a diagnostic group. + // We construct a SmallVector here to talk to getDiagnosticIDs(). + // Although we don't use the result, this isn't a hot path, and not + // worth special casing. + llvm::SmallVector<diag::kind, 10> Diags; + Value = !getDiagnostics().getDiagnosticIDs()-> + getDiagnosticsInGroup(WarningName.substr(2), Diags); + } + } while (false); + + if (!IsValid) + Diag(StartLoc, diag::err_warning_check_malformed); + + OS << (int)Value; + Tok.setKind(tok::numeric_constant); } else { - assert(0 && "Unknown identifier!"); + llvm_unreachable("Unknown identifier!"); } - CreateString(OS.str().data(), OS.str().size(), Tok, Tok.getLocation()); + CreateString(OS.str().data(), OS.str().size(), Tok, + Tok.getLocation(), Tok.getLocation()); } void Preprocessor::markMacroAsUsed(MacroInfo *MI) { diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index e5ef0fdf20eb..e0c4cf0c16c8 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -73,7 +73,7 @@ LexNextToken: Tok.setKind(TKind); Tok.setFlag(TFlags); assert(!LexingRawMode); - Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset)); + Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset)); Tok.setLength(Len); // Handle identifiers. @@ -147,7 +147,7 @@ bool PTHLexer::LexEndOfFile(Token &Result) { // If we are in a #if directive, emit an error. while (!ConditionalStack.empty()) { - if (!PP->isCodeCompletionFile(FileStartLoc)) + if (PP->getCodeCompletionFileLoc() != FileStartLoc) PP->Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); ConditionalStack.pop_back(); @@ -297,7 +297,7 @@ SourceLocation PTHLexer::getSourceLocation() { // NOTE: This is a virtual function; hence it is defined out-of-line. const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4); uint32_t Offset = ReadLE32(OffsetPtr); - return FileStartLoc.getFileLocWithOffset(Offset); + return FileStartLoc.getLocWithOffset(Offset); } //===----------------------------------------------------------------------===// @@ -380,7 +380,7 @@ public: } static unsigned ComputeHash(const internal_key_type& a) { - return llvm::HashString(llvm::StringRef(a.first, a.second)); + return llvm::HashString(StringRef(a.first, a.second)); } // This hopefully will just get inlined and removed by the optimizer. @@ -431,11 +431,12 @@ PTHManager::~PTHManager() { free(PerIDCache); } -static void InvalidPTH(Diagnostic &Diags, const char *Msg) { - Diags.Report(Diags.getCustomDiagID(Diagnostic::Error, Msg)); +static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) { + Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, Msg)); } -PTHManager *PTHManager::Create(const std::string &file, Diagnostic &Diags) { +PTHManager *PTHManager::Create(const std::string &file, + DiagnosticsEngine &Diags) { // Memory map the PTH file. llvm::OwningPtr<llvm::MemoryBuffer> File; @@ -572,10 +573,10 @@ IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { return II; } -IdentifierInfo* PTHManager::get(llvm::StringRef Name) { +IdentifierInfo* PTHManager::get(StringRef Name) { PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup); // Double check our assumption that the last character isn't '\0'. - assert(Name.empty() || Name.data()[Name.size()-1] != '\0'); + assert(Name.empty() || Name.back() != '\0'); PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(), Name.size())); if (I == SL.end()) // No identifier found? diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index e6b28c13317b..f6532c2175a1 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -54,11 +54,11 @@ PragmaNamespace::~PragmaNamespace() { /// specified name. If not, return the handler for the null identifier if it /// exists, otherwise return null. If IgnoreNull is true (the default) then /// the null handler isn't returned on failure to match. -PragmaHandler *PragmaNamespace::FindHandler(llvm::StringRef Name, +PragmaHandler *PragmaNamespace::FindHandler(StringRef Name, bool IgnoreNull) const { if (PragmaHandler *Handler = Handlers.lookup(Name)) return Handler; - return IgnoreNull ? 0 : Handlers.lookup(llvm::StringRef()); + return IgnoreNull ? 0 : Handlers.lookup(StringRef()); } void PragmaNamespace::AddPragma(PragmaHandler *Handler) { @@ -85,7 +85,7 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, // Get the handler for this token. If there is no handler, ignore the pragma. PragmaHandler *Handler = FindHandler(Tok.getIdentifierInfo() ? Tok.getIdentifierInfo()->getName() - : llvm::StringRef(), + : StringRef(), /*IgnoreNull=*/false); if (Handler == 0) { PP.Diag(Tok, diag::warn_pragma_ignored); @@ -210,7 +210,7 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { } // Get the tokens enclosed within the __pragma(), as well as the final ')'. - llvm::SmallVector<Token, 32> PragmaToks; + SmallVector<Token, 32> PragmaToks; int NumParens = 0; Lex(Tok); while (Tok.isNot(tok::eof)) { @@ -353,7 +353,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { // Reserve a buffer to get the spelling. llvm::SmallString<128> FilenameBuffer; bool Invalid = false; - llvm::StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid); + StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid); if (Invalid) return; @@ -366,9 +366,11 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { // Search include directories for this file. const DirectoryLookup *CurDir; - const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir, NULL, NULL); + const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir, NULL, NULL, + NULL); if (File == 0) { - Diag(FilenameTok, diag::warn_pp_file_not_found) << Filename; + if (!SuppressIncludeNotFoundError) + Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; return; } @@ -436,7 +438,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { // String concatenation allows multiple strings, which can even come from // macro expansion. // "foo " "bar" "Baz" - llvm::SmallVector<Token, 4> StrToks; + SmallVector<Token, 4> StrToks; while (Tok.is(tok::string_literal)) { StrToks.push_back(Tok); Lex(Tok); @@ -444,7 +446,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { // Concatenate and parse the strings. StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); - assert(!Literal.AnyWide && "Didn't allow wide strings in"); + assert(Literal.isAscii() && "Didn't allow wide strings in"); if (Literal.hadError) return; if (Literal.Pascal) { @@ -512,7 +514,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) { // String concatenation allows multiple strings, which can even come from // macro expansion. // "foo " "bar" "Baz" - llvm::SmallVector<Token, 4> StrToks; + SmallVector<Token, 4> StrToks; while (Tok.is(tok::string_literal)) { StrToks.push_back(Tok); Lex(Tok); @@ -520,7 +522,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) { // Concatenate and parse the strings. StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this); - assert(!Literal.AnyWide && "Didn't allow wide strings in"); + assert(Literal.isAscii() && "Didn't allow wide strings in"); if (Literal.hadError) return; if (Literal.Pascal) { @@ -528,7 +530,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) { return; } - llvm::StringRef MessageString(Literal.GetString()); + StringRef MessageString(Literal.GetString()); if (ExpectClosingParen) { if (Tok.isNot(tok::r_paren)) { @@ -662,7 +664,7 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { /// AddPragmaHandler - Add the specified pragma handler to the preprocessor. /// If 'Namespace' is non-null, then it is a token required to exist on the /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". -void Preprocessor::AddPragmaHandler(llvm::StringRef Namespace, +void Preprocessor::AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler) { PragmaNamespace *InsertNS = PragmaHandlers; @@ -693,7 +695,7 @@ void Preprocessor::AddPragmaHandler(llvm::StringRef Namespace, /// preprocessor. If \arg Namespace is non-null, then it should be the /// namespace that \arg Handler was added to. It is an error to remove /// a handler that has not been registered. -void Preprocessor::RemovePragmaHandler(llvm::StringRef Namespace, +void Preprocessor::RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler) { PragmaNamespace *NS = PragmaHandlers; @@ -802,7 +804,7 @@ struct PragmaDebugHandler : public PragmaHandler { IdentifierInfo *II = Tok.getIdentifierInfo(); if (II->isStr("assert")) { - assert(0 && "This is an assertion!"); + llvm_unreachable("This is an assertion!"); } else if (II->isStr("crash")) { *(volatile int*) 0x11 = 0; } else if (II->isStr("llvm_fatal_error")) { @@ -889,7 +891,7 @@ public: // String concatenation allows multiple strings, which can even come from // macro expansion. // "foo " "bar" "Baz" - llvm::SmallVector<Token, 4> StrToks; + SmallVector<Token, 4> StrToks; while (Tok.is(tok::string_literal)) { StrToks.push_back(Tok); PP.LexUnexpandedToken(Tok); @@ -902,7 +904,7 @@ public: // Concatenate and parse the strings. StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP); - assert(!Literal.AnyWide && "Didn't allow wide strings in"); + assert(Literal.isAscii() && "Didn't allow wide strings in"); if (Literal.hadError) return; if (Literal.Pascal) { @@ -910,7 +912,7 @@ public: return; } - llvm::StringRef WarningName(Literal.GetString()); + StringRef WarningName(Literal.GetString()); if (WarningName.size() < 3 || WarningName[0] != '-' || WarningName[1] != 'W') { @@ -1003,6 +1005,60 @@ struct PragmaSTDC_UnknownHandler : public PragmaHandler { } }; +/// PragmaARCCFCodeAuditedHandler - +/// #pragma clang arc_cf_code_audited begin/end +struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { + PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {} + virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &NameTok) { + SourceLocation Loc = NameTok.getLocation(); + bool IsBegin; + + Token Tok; + + // Lex the 'begin' or 'end'. + PP.LexUnexpandedToken(Tok); + const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo(); + if (BeginEnd && BeginEnd->isStr("begin")) { + IsBegin = true; + } else if (BeginEnd && BeginEnd->isStr("end")) { + IsBegin = false; + } else { + PP.Diag(Tok.getLocation(), diag::err_pp_arc_cf_code_audited_syntax); + return; + } + + // Verify that this is followed by EOD. + PP.LexUnexpandedToken(Tok); + if (Tok.isNot(tok::eod)) + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + + // The start location of the active audit. + SourceLocation BeginLoc = PP.getPragmaARCCFCodeAuditedLoc(); + + // The start location we want after processing this. + SourceLocation NewLoc; + + if (IsBegin) { + // Complain about attempts to re-enter an audit. + if (BeginLoc.isValid()) { + PP.Diag(Loc, diag::err_pp_double_begin_of_arc_cf_code_audited); + PP.Diag(BeginLoc, diag::note_pragma_entered_here); + } + NewLoc = Loc; + } else { + // Complain about attempts to leave an audit that doesn't exist. + if (!BeginLoc.isValid()) { + PP.Diag(Loc, diag::err_pp_unmatched_end_of_arc_cf_code_audited); + return; + } + NewLoc = SourceLocation(); + } + + PP.setPragmaARCCFCodeAuditedLoc(NewLoc); + } +}; + } // end anonymous namespace @@ -1026,13 +1082,14 @@ void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler("clang", new PragmaDebugHandler()); AddPragmaHandler("clang", new PragmaDependencyHandler()); AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang")); + AddPragmaHandler("clang", new PragmaARCCFCodeAuditedHandler()); AddPragmaHandler("STDC", new PragmaSTDC_FENV_ACCESSHandler()); AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler()); AddPragmaHandler("STDC", new PragmaSTDC_UnknownHandler()); // MS extensions. - if (Features.Microsoft) { + if (Features.MicrosoftExt) { AddPragmaHandler(new PragmaCommentHandler()); } } diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp index 9f93ab04502a..2816609d5f8f 100644 --- a/lib/Lex/PreprocessingRecord.cpp +++ b/lib/Lex/PreprocessingRecord.cpp @@ -14,8 +14,8 @@ #include "clang/Lex/PreprocessingRecord.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Token.h" -#include "clang/Basic/IdentifierTable.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Capacity.h" using namespace clang; @@ -24,7 +24,7 @@ ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() { } InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, InclusionKind Kind, - llvm::StringRef FileName, + StringRef FileName, bool InQuotes, const FileEntry *File, SourceRange Range) : PreprocessingDirective(InclusionDirectiveKind, Range), @@ -34,116 +34,254 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, = (char*)PPRec.Allocate(FileName.size() + 1, llvm::alignOf<char>()); memcpy(Memory, FileName.data(), FileName.size()); Memory[FileName.size()] = 0; - this->FileName = llvm::StringRef(Memory, FileName.size()); + this->FileName = StringRef(Memory, FileName.size()); } -void PreprocessingRecord::MaybeLoadPreallocatedEntities() const { - if (!ExternalSource || LoadedPreallocatedEntities) - return; - - LoadedPreallocatedEntities = true; - ExternalSource->ReadPreprocessedEntities(); +PreprocessingRecord::PreprocessingRecord(SourceManager &SM, + bool IncludeNestedMacroExpansions) + : SourceMgr(SM), IncludeNestedMacroExpansions(IncludeNestedMacroExpansions), + ExternalSource(0) +{ } -PreprocessingRecord::PreprocessingRecord(bool IncludeNestedMacroExpansions) - : IncludeNestedMacroExpansions(IncludeNestedMacroExpansions), - ExternalSource(0), NumPreallocatedEntities(0), - LoadedPreallocatedEntities(false) -{ +/// \brief Returns a pair of [Begin, End) iterators of preprocessed entities +/// that source range \arg R encompasses. +std::pair<PreprocessingRecord::iterator, PreprocessingRecord::iterator> +PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) { + if (Range.isInvalid()) + return std::make_pair(iterator(this, 0), iterator(this, 0)); + assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin())); + + std::pair<unsigned, unsigned> + Local = findLocalPreprocessedEntitiesInRange(Range); + + // Check if range spans local entities. + if (!ExternalSource || SourceMgr.isLocalSourceLocation(Range.getBegin())) + return std::make_pair(iterator(this, Local.first), + iterator(this, Local.second)); + + std::pair<unsigned, unsigned> + Loaded = ExternalSource->findPreprocessedEntitiesInRange(Range); + + // Check if range spans local entities. + if (Loaded.first == Loaded.second) + return std::make_pair(iterator(this, Local.first), + iterator(this, Local.second)); + + unsigned TotalLoaded = LoadedPreprocessedEntities.size(); + + // Check if range spans loaded entities. + if (Local.first == Local.second) + return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded), + iterator(this, int(Loaded.second)-TotalLoaded)); + + // Range spands loaded and local entities. + return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded), + iterator(this, Local.second)); } -PreprocessingRecord::iterator -PreprocessingRecord::begin(bool OnlyLocalEntities) { - if (OnlyLocalEntities) - return PreprocessedEntities.begin() + NumPreallocatedEntities; - - MaybeLoadPreallocatedEntities(); - return PreprocessedEntities.begin(); +std::pair<unsigned, unsigned> +PreprocessingRecord::findLocalPreprocessedEntitiesInRange( + SourceRange Range) const { + if (Range.isInvalid()) + return std::make_pair(0,0); + assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin())); + + unsigned Begin = findBeginLocalPreprocessedEntity(Range.getBegin()); + unsigned End = findEndLocalPreprocessedEntity(Range.getEnd()); + return std::make_pair(Begin, End); } -PreprocessingRecord::iterator PreprocessingRecord::end(bool OnlyLocalEntities) { - if (!OnlyLocalEntities) - MaybeLoadPreallocatedEntities(); - - return PreprocessedEntities.end(); +namespace { + +template <SourceLocation (SourceRange::*getRangeLoc)() const> +struct PPEntityComp { + const SourceManager &SM; + + explicit PPEntityComp(const SourceManager &SM) : SM(SM) { } + + bool operator()(PreprocessedEntity *L, PreprocessedEntity *R) const { + SourceLocation LHS = getLoc(L); + SourceLocation RHS = getLoc(R); + return SM.isBeforeInTranslationUnit(LHS, RHS); + } + + bool operator()(PreprocessedEntity *L, SourceLocation RHS) const { + SourceLocation LHS = getLoc(L); + return SM.isBeforeInTranslationUnit(LHS, RHS); + } + + bool operator()(SourceLocation LHS, PreprocessedEntity *R) const { + SourceLocation RHS = getLoc(R); + return SM.isBeforeInTranslationUnit(LHS, RHS); + } + + SourceLocation getLoc(PreprocessedEntity *PPE) const { + SourceRange Range = PPE->getSourceRange(); + return (Range.*getRangeLoc)(); + } +}; + } -PreprocessingRecord::const_iterator -PreprocessingRecord::begin(bool OnlyLocalEntities) const { - if (OnlyLocalEntities) - return PreprocessedEntities.begin() + NumPreallocatedEntities; - - MaybeLoadPreallocatedEntities(); - return PreprocessedEntities.begin(); +unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity( + SourceLocation Loc) const { + if (SourceMgr.isLoadedSourceLocation(Loc)) + return 0; + + size_t Count = PreprocessedEntities.size(); + size_t Half; + std::vector<PreprocessedEntity *>::const_iterator + First = PreprocessedEntities.begin(); + std::vector<PreprocessedEntity *>::const_iterator I; + + // Do a binary search manually instead of using std::lower_bound because + // The end locations of entities may be unordered (when a macro expansion + // is inside another macro argument), but for this case it is not important + // whether we get the first macro expansion or its containing macro. + while (Count > 0) { + Half = Count/2; + I = First; + std::advance(I, Half); + if (SourceMgr.isBeforeInTranslationUnit((*I)->getSourceRange().getEnd(), + Loc)){ + First = I; + ++First; + Count = Count - Half - 1; + } else + Count = Half; + } + + return First - PreprocessedEntities.begin(); } -PreprocessingRecord::const_iterator -PreprocessingRecord::end(bool OnlyLocalEntities) const { - if (!OnlyLocalEntities) - MaybeLoadPreallocatedEntities(); - - return PreprocessedEntities.end(); +unsigned PreprocessingRecord::findEndLocalPreprocessedEntity( + SourceLocation Loc) const { + if (SourceMgr.isLoadedSourceLocation(Loc)) + return 0; + + std::vector<PreprocessedEntity *>::const_iterator + I = std::upper_bound(PreprocessedEntities.begin(), + PreprocessedEntities.end(), + Loc, + PPEntityComp<&SourceRange::getBegin>(SourceMgr)); + return I - PreprocessedEntities.begin(); } void PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { - PreprocessedEntities.push_back(Entity); + assert(Entity); + SourceLocation BeginLoc = Entity->getSourceRange().getBegin(); + + // Check normal case, this entity begin location is after the previous one. + if (PreprocessedEntities.empty() || + !SourceMgr.isBeforeInTranslationUnit(BeginLoc, + PreprocessedEntities.back()->getSourceRange().getBegin())) { + PreprocessedEntities.push_back(Entity); + return; + } + + // The entity's location is not after the previous one; this can happen rarely + // e.g. with "#include MACRO". + // Iterate the entities vector in reverse until we find the right place to + // insert the new entity. + for (std::vector<PreprocessedEntity *>::iterator + RI = PreprocessedEntities.end(), Begin = PreprocessedEntities.begin(); + RI != Begin; --RI) { + std::vector<PreprocessedEntity *>::iterator I = RI; + --I; + if (!SourceMgr.isBeforeInTranslationUnit(BeginLoc, + (*I)->getSourceRange().getBegin())) { + PreprocessedEntities.insert(RI, Entity); + return; + } + } } void PreprocessingRecord::SetExternalSource( - ExternalPreprocessingRecordSource &Source, - unsigned NumPreallocatedEntities) { + ExternalPreprocessingRecordSource &Source) { assert(!ExternalSource && "Preprocessing record already has an external source"); ExternalSource = &Source; - this->NumPreallocatedEntities = NumPreallocatedEntities; - PreprocessedEntities.insert(PreprocessedEntities.begin(), - NumPreallocatedEntities, 0); } -void PreprocessingRecord::SetPreallocatedEntity(unsigned Index, - PreprocessedEntity *Entity) { - assert(Index < NumPreallocatedEntities &&"Out-of-bounds preallocated entity"); - PreprocessedEntities[Index] = Entity; +unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) { + unsigned Result = LoadedPreprocessedEntities.size(); + LoadedPreprocessedEntities.resize(LoadedPreprocessedEntities.size() + + NumEntities); + return Result; +} + +void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro, + PPEntityID PPID) { + MacroDefinitions[Macro] = PPID; +} + +/// \brief Retrieve the preprocessed entity at the given ID. +PreprocessedEntity *PreprocessingRecord::getPreprocessedEntity(PPEntityID PPID){ + if (PPID < 0) { + assert(unsigned(-PPID-1) < LoadedPreprocessedEntities.size() && + "Out-of bounds loaded preprocessed entity"); + return getLoadedPreprocessedEntity(LoadedPreprocessedEntities.size()+PPID); + } + assert(unsigned(PPID) < PreprocessedEntities.size() && + "Out-of bounds local preprocessed entity"); + return PreprocessedEntities[PPID]; } -void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro, - MacroDefinition *MD) { - MacroDefinitions[Macro] = MD; +/// \brief Retrieve the loaded preprocessed entity at the given index. +PreprocessedEntity * +PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) { + assert(Index < LoadedPreprocessedEntities.size() && + "Out-of bounds loaded preprocessed entity"); + assert(ExternalSource && "No external source to load from"); + PreprocessedEntity *&Entity = LoadedPreprocessedEntities[Index]; + if (!Entity) { + Entity = ExternalSource->ReadPreprocessedEntity(Index); + if (!Entity) // Failed to load. + Entity = new (*this) + PreprocessedEntity(PreprocessedEntity::InvalidKind, SourceRange()); + } + return Entity; } MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) { - llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos + llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos = MacroDefinitions.find(MI); if (Pos == MacroDefinitions.end()) return 0; - return Pos->second; + PreprocessedEntity *Entity = getPreprocessedEntity(Pos->second); + if (Entity->isInvalid()) + return 0; + return cast<MacroDefinition>(Entity); } -void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI) { +void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI, + SourceRange Range) { if (!IncludeNestedMacroExpansions && Id.getLocation().isMacroID()) return; - if (MacroDefinition *Def = findMacroDefinition(MI)) - PreprocessedEntities.push_back( - new (*this) MacroExpansion(Id.getIdentifierInfo(), - Id.getLocation(), Def)); + if (MI->isBuiltinMacro()) + addPreprocessedEntity( + new (*this) MacroExpansion(Id.getIdentifierInfo(),Range)); + else if (MacroDefinition *Def = findMacroDefinition(MI)) + addPreprocessedEntity( + new (*this) MacroExpansion(Def, Range)); } void PreprocessingRecord::MacroDefined(const Token &Id, const MacroInfo *MI) { SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc()); MacroDefinition *Def - = new (*this) MacroDefinition(Id.getIdentifierInfo(), - MI->getDefinitionLoc(), - R); - MacroDefinitions[MI] = Def; - PreprocessedEntities.push_back(Def); + = new (*this) MacroDefinition(Id.getIdentifierInfo(), R); + addPreprocessedEntity(Def); + MacroDefinitions[MI] = getPPEntityID(PreprocessedEntities.size()-1, + /*isLoaded=*/false); } void PreprocessingRecord::MacroUndefined(const Token &Id, const MacroInfo *MI) { - llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos + llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos = MacroDefinitions.find(MI); if (Pos != MacroDefinitions.end()) MacroDefinitions.erase(Pos); @@ -152,12 +290,12 @@ void PreprocessingRecord::MacroUndefined(const Token &Id, void PreprocessingRecord::InclusionDirective( SourceLocation HashLoc, const clang::Token &IncludeTok, - llvm::StringRef FileName, + StringRef FileName, bool IsAngled, const FileEntry *File, clang::SourceLocation EndLoc, - llvm::StringRef SearchPath, - llvm::StringRef RelativePath) { + StringRef SearchPath, + StringRef RelativePath) { InclusionDirective::InclusionKind Kind = InclusionDirective::Include; switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { @@ -185,5 +323,12 @@ void PreprocessingRecord::InclusionDirective( clang::InclusionDirective *ID = new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled, File, SourceRange(HashLoc, EndLoc)); - PreprocessedEntities.push_back(ID); + addPreprocessedEntity(ID); +} + +size_t PreprocessingRecord::getTotalMemory() const { + return BumpAlloc.getTotalMemory() + + llvm::capacity_in_bytes(MacroDefinitions) + + llvm::capacity_in_bytes(PreprocessedEntities) + + llvm::capacity_in_bytes(LoadedPreprocessedEntities); } diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index e7aa286a16bf..31662ad0c116 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -35,6 +35,7 @@ #include "clang/Lex/ScratchBuffer.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/ModuleLoader.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/TargetInfo.h" @@ -42,27 +43,83 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Capacity.h" using namespace clang; //===----------------------------------------------------------------------===// ExternalPreprocessorSource::~ExternalPreprocessorSource() { } -Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, - const TargetInfo &target, SourceManager &SM, - HeaderSearch &Headers, +Preprocessor::Preprocessor(DiagnosticsEngine &diags, LangOptions &opts, + const TargetInfo *target, SourceManager &SM, + HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup* IILookup, - bool OwnsHeaders) + bool OwnsHeaders, + bool DelayInitialization) : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()), - SourceMgr(SM), - HeaderInfo(Headers), ExternalSource(0), - Identifiers(opts, IILookup), BuiltinInfo(Target), CodeComplete(0), - CodeCompletionFile(0), SkipMainFilePreamble(0, true), CurPPLexer(0), - CurDirLookup(0), Callbacks(0), MacroArgCache(0), Record(0), MIChainHead(0), - MICache(0) { - ScratchBuf = new ScratchBuffer(SourceMgr); - CounterValue = 0; // __COUNTER__ starts at 0. + SourceMgr(SM), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), + ExternalSource(0), + Identifiers(opts, IILookup), CodeComplete(0), + CodeCompletionFile(0), CodeCompletionOffset(0), CodeCompletionReached(0), + SkipMainFilePreamble(0, true), CurPPLexer(0), + CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), MacroArgCache(0), + Record(0), MIChainHead(0), MICache(0) +{ OwnsHeaderSearch = OwnsHeaders; + + if (!DelayInitialization) { + assert(Target && "Must provide target information for PP initialization"); + Initialize(*Target); + } +} + +Preprocessor::~Preprocessor() { + assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); + assert(((MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty()) || + isCodeCompletionReached()) && + "Preprocessor::HandleEndOfTokenLexer should have cleared those"); + + while (!IncludeMacroStack.empty()) { + delete IncludeMacroStack.back().TheLexer; + delete IncludeMacroStack.back().TheTokenLexer; + IncludeMacroStack.pop_back(); + } + // Free any macro definitions. + for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next) + I->MI.Destroy(); + + // Free any cached macro expanders. + for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) + delete TokenLexerCache[i]; + + // Free any cached MacroArgs. + for (MacroArgs *ArgList = MacroArgCache; ArgList; ) + ArgList = ArgList->deallocate(); + + // Release pragma information. + delete PragmaHandlers; + + // Delete the scratch buffer info. + delete ScratchBuf; + + // Delete the header search info, if we own it. + if (OwnsHeaderSearch) + delete &HeaderInfo; + + delete Callbacks; +} + +void Preprocessor::Initialize(const TargetInfo &Target) { + assert((!this->Target || this->Target == &Target) && + "Invalid override of target information"); + this->Target = &Target; + + // Initialize information about built-ins. + BuiltinInfo.InitializeTarget(Target); + + ScratchBuf = new ScratchBuffer(SourceMgr); + CounterValue = 0; // __COUNTER__ starts at 0. + // Clear stats. NumDirectives = NumDefined = NumUndefined = NumPragma = 0; NumIf = NumElse = NumEndif = 0; @@ -71,33 +128,35 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; MaxIncludeStackDepth = 0; NumSkipped = 0; - + // Default to discarding comments. KeepComments = false; KeepMacroComments = false; - + SuppressIncludeNotFoundError = false; + AutoModuleImport = false; + // Macro expansion is enabled. DisableMacroExpansion = false; InMacroArgs = false; NumCachedTokenLexers = 0; - + CachedLexPos = 0; - + // We haven't read anything from the external source. ReadMacrosFromExternalSource = false; - + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. // This gets unpoisoned where it is allowed. (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); - + // Initialize the pragma handlers. - PragmaHandlers = new PragmaNamespace(llvm::StringRef()); + PragmaHandlers = new PragmaNamespace(StringRef()); RegisterBuiltinPragmas(); - + // Initialize builtin macros like __LINE__ and friends. RegisterBuiltinMacros(); - + if(Features.Borland) { Ident__exception_info = getIdentifierInfo("_exception_info"); Ident___exception_info = getIdentifierInfo("__exception_info"); @@ -112,44 +171,7 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, Ident__exception_info = Ident__exception_code = Ident__abnormal_termination = 0; Ident___exception_info = Ident___exception_code = Ident___abnormal_termination = 0; Ident_GetExceptionInfo = Ident_GetExceptionCode = Ident_AbnormalTermination = 0; - } - -} - -Preprocessor::~Preprocessor() { - assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); - assert(MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty() && - "Preprocessor::HandleEndOfTokenLexer should have cleared those"); - - while (!IncludeMacroStack.empty()) { - delete IncludeMacroStack.back().TheLexer; - delete IncludeMacroStack.back().TheTokenLexer; - IncludeMacroStack.pop_back(); - } - - // Free any macro definitions. - for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next) - I->MI.Destroy(); - - // Free any cached macro expanders. - for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) - delete TokenLexerCache[i]; - - // Free any cached MacroArgs. - for (MacroArgs *ArgList = MacroArgCache; ArgList; ) - ArgList = ArgList->deallocate(); - - // Release pragma information. - delete PragmaHandlers; - - // Delete the scratch buffer info. - delete ScratchBuf; - - // Delete the header search info, if we own it. - if (OwnsHeaderSearch) - delete &HeaderInfo; - - delete Callbacks; + } } void Preprocessor::setPTHManager(PTHManager* pm) { @@ -172,7 +194,7 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { llvm::errs() << " [ExpandDisabled]"; if (Tok.needsCleaning()) { const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); - llvm::errs() << " [UnClean='" << llvm::StringRef(Start, Tok.getLength()) + llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']"; } @@ -228,7 +250,13 @@ Preprocessor::macro_begin(bool IncludeExternalMacros) const { } size_t Preprocessor::getTotalMemory() const { - return BP.getTotalMemory() + MacroExpandedTokens.capacity()*sizeof(Token); + return BP.getTotalMemory() + + llvm::capacity_in_bytes(MacroExpandedTokens) + + Predefines.capacity() /* Predefines buffer. */ + + llvm::capacity_in_bytes(Macros) + + llvm::capacity_in_bytes(PragmaPushMacroInfo) + + llvm::capacity_in_bytes(PoisonReasons) + + llvm::capacity_in_bytes(CommentHandlers); } Preprocessor::macro_iterator @@ -243,15 +271,13 @@ Preprocessor::macro_end(bool IncludeExternalMacros) const { } bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, - unsigned TruncateAtLine, - unsigned TruncateAtColumn) { - using llvm::MemoryBuffer; - - CodeCompletionFile = File; + unsigned CompleteLine, + unsigned CompleteColumn) { + assert(File); + assert(CompleteLine && CompleteColumn && "Starts from 1:1"); + assert(!CodeCompletionFile && "Already set"); - // Okay to clear out the code-completion point by passing NULL. - if (!CodeCompletionFile) - return false; + using llvm::MemoryBuffer; // Load the actual file's contents. bool Invalid = false; @@ -261,7 +287,7 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, // Find the byte position of the truncation point. const char *Position = Buffer->getBufferStart(); - for (unsigned Line = 1; Line < TruncateAtLine; ++Line) { + for (unsigned Line = 1; Line < CompleteLine; ++Line) { for (; *Position; ++Position) { if (*Position != '\r' && *Position != '\n') continue; @@ -275,38 +301,37 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, } } - Position += TruncateAtColumn - 1; + Position += CompleteColumn - 1; - // Truncate the buffer. + // Insert '\0' at the code-completion point. if (Position < Buffer->getBufferEnd()) { - llvm::StringRef Data(Buffer->getBufferStart(), - Position-Buffer->getBufferStart()); - MemoryBuffer *TruncatedBuffer - = MemoryBuffer::getMemBufferCopy(Data, Buffer->getBufferIdentifier()); - SourceMgr.overrideFileContents(File, TruncatedBuffer); + CodeCompletionFile = File; + CodeCompletionOffset = Position - Buffer->getBufferStart(); + + MemoryBuffer *NewBuffer = + MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, + Buffer->getBufferIdentifier()); + char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); + char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); + *NewPos = '\0'; + std::copy(Position, Buffer->getBufferEnd(), NewPos+1); + SourceMgr.overrideFileContents(File, NewBuffer); } return false; } -bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const { - return CodeCompletionFile && FileLoc.isFileID() && - SourceMgr.getFileEntryForID(SourceMgr.getFileID(FileLoc)) - == CodeCompletionFile; -} - void Preprocessor::CodeCompleteNaturalLanguage() { - SetCodeCompletionPoint(0, 0, 0); - getDiagnostics().setSuppressAllDiagnostics(true); if (CodeComplete) CodeComplete->CodeCompleteNaturalLanguage(); + setCodeCompletionReached(); } /// getSpelling - This method is used to get the spelling of a token into a /// SmallVector. Note that the returned StringRef may not point to the /// supplied buffer if a copy can be avoided. -llvm::StringRef Preprocessor::getSpelling(const Token &Tok, - llvm::SmallVectorImpl<char> &Buffer, +StringRef Preprocessor::getSpelling(const Token &Tok, + SmallVectorImpl<char> &Buffer, bool *Invalid) const { // NOTE: this has to be checked *before* testing for an IdentifierInfo. if (Tok.isNot(tok::raw_identifier)) { @@ -321,22 +346,23 @@ llvm::StringRef Preprocessor::getSpelling(const Token &Tok, const char *Ptr = Buffer.data(); unsigned Len = getSpelling(Tok, Ptr, Invalid); - return llvm::StringRef(Ptr, Len); + return StringRef(Ptr, Len); } /// CreateString - Plop the specified string into a scratch buffer and return a /// location for it. If specified, the source location provides a source /// location for the token. void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, - SourceLocation ExpansionLoc) { + SourceLocation ExpansionLocStart, + SourceLocation ExpansionLocEnd) { Tok.setLength(Len); const char *DestPtr; SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr); - if (ExpansionLoc.isValid()) - Loc = SourceMgr.createInstantiationLoc(Loc, ExpansionLoc, - ExpansionLoc, Len); + if (ExpansionLocStart.isValid()) + Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, + ExpansionLocEnd, Len); Tok.setLocation(Loc); // If this is a raw identifier or a literal token, set the pointer data. @@ -407,12 +433,12 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { IdentifierInfo *II; if (!Identifier.needsCleaning()) { // No cleaning needed, just use the characters from the lexed buffer. - II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(), + II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(), Identifier.getLength())); } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. llvm::SmallString<64> IdentifierBuffer; - llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); + StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); II = getIdentifierInfo(CleanedStr); } @@ -487,6 +513,17 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { } } + // If this identifier is a keyword in C++11, produce a warning. Don't warn if + // we're not considering macro expansion, since this identifier might be the + // name of a macro. + // FIXME: This warning is disabled in cases where it shouldn't be, like + // "#define constexpr constexpr", "int constexpr;" + if (II.isCXX11CompatKeyword() & !DisableMacroExpansion) { + Diag(Identifier, diag::warn_cxx11_keyword) << II.getName(); + // Don't diagnose this keyword again in this translation unit. + II.setIsCXX11CompatKeyword(false); + } + // C++ 2.11p2: If this is an alternative representation of a C++ operator, // then we act as if it is the actual operator and not the textual // representation of it. @@ -499,6 +536,44 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { // like "#define TY typeof", "TY(1) x". if (II.isExtensionToken() && !DisableMacroExpansion) Diag(Identifier, diag::ext_token_used); + + // If this is the '__import_module__' keyword, note that the next token + // indicates a module name. + if (II.getTokenID() == tok::kw___import_module__ && + !InMacroArgs && !DisableMacroExpansion) { + ModuleImportLoc = Identifier.getLocation(); + CurLexerKind = CLK_LexAfterModuleImport; + } +} + +/// \brief Lex a token following the __import_module__ keyword. +void Preprocessor::LexAfterModuleImport(Token &Result) { + // Figure out what kind of lexer we actually have. + if (CurLexer) + CurLexerKind = CLK_Lexer; + else if (CurPTHLexer) + CurLexerKind = CLK_PTHLexer; + else if (CurTokenLexer) + CurLexerKind = CLK_TokenLexer; + else + CurLexerKind = CLK_CachingLexer; + + // Lex the next token. + Lex(Result); + + // The token sequence + // + // __import_module__ identifier + // + // indicates a module import directive. We already saw the __import_module__ + // keyword, so now we're looking for the identifier. + if (Result.getKind() != tok::identifier) + return; + + // Load the module. + (void)TheModuleLoader.loadModule(ModuleImportLoc, + *Result.getIdentifierInfo(), + Result.getLocation()); } void Preprocessor::AddCommentHandler(CommentHandler *Handler) { @@ -529,6 +604,8 @@ bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { return true; } +ModuleLoader::~ModuleLoader() { } + CommentHandler::~CommentHandler() { } CodeCompletionHandler::~CodeCompletionHandler() { } @@ -538,6 +615,7 @@ void Preprocessor::createPreprocessingRecord( if (Record) return; - Record = new PreprocessingRecord(IncludeNestedMacroExpansions); + Record = new PreprocessingRecord(getSourceManager(), + IncludeNestedMacroExpansions); addPPCallbacks(Record); } diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index 808a81bd5e87..0da9ef5531e7 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -17,6 +17,14 @@ #include "clang/Basic/SourceManager.h" using namespace clang; +PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) + : PP(pp), FID(fid), InitialNumSLocEntries(0), + ParsingPreprocessorDirective(false), + ParsingFilename(false), LexingRawMode(false) { + if (pp) + InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size(); +} + /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and /// (potentially) macro expand the filename. void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp index 0e98c1751985..3d363fa4b472 100644 --- a/lib/Lex/ScratchBuffer.cpp +++ b/lib/Lex/ScratchBuffer.cpp @@ -53,7 +53,7 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, // diagnostic points to one. CurBuffer[BytesUsed-1] = '\0'; - return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len-1); + return BufferStartLoc.getLocWithOffset(BytesUsed-Len-1); } void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp index 3e9e8550313c..dc6d686d6cc1 100644 --- a/lib/Lex/TokenConcatenation.cpp +++ b/lib/Lex/TokenConcatenation.cpp @@ -17,42 +17,53 @@ using namespace clang; -/// StartsWithL - Return true if the spelling of this token starts with 'L'. -bool TokenConcatenation::StartsWithL(const Token &Tok) const { - if (!Tok.needsCleaning()) { - SourceManager &SM = PP.getSourceManager(); - return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L'; - } +/// IsStringPrefix - Return true if Str is a string prefix. +/// 'L', 'u', 'U', or 'u8'. Including raw versions. +static bool IsStringPrefix(StringRef Str, bool CPlusPlus0x) { - if (Tok.getLength() < 256) { - char Buffer[256]; - const char *TokPtr = Buffer; - PP.getSpelling(Tok, TokPtr); - return TokPtr[0] == 'L'; + if (Str[0] == 'L' || + (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) { + + if (Str.size() == 1) + return true; // "L", "u", "U", and "R" + + // Check for raw flavors. Need to make sure the first character wasn't + // already R. Need CPlusPlus0x check for "LR". + if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x) + return true; // "LR", "uR", "UR" + + // Check for "u8" and "u8R" + if (Str[0] == 'u' && Str[1] == '8') { + if (Str.size() == 2) return true; // "u8" + if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R" + } } - return PP.getSpelling(Tok)[0] == 'L'; + return false; } -/// IsIdentifierL - Return true if the spelling of this token is literally -/// 'L'. -bool TokenConcatenation::IsIdentifierL(const Token &Tok) const { +/// IsIdentifierStringPrefix - Return true if the spelling of the token +/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions. +bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const { + const LangOptions &LangOpts = PP.getLangOptions(); + if (!Tok.needsCleaning()) { - if (Tok.getLength() != 1) + if (Tok.getLength() < 1 || Tok.getLength() > 3) return false; SourceManager &SM = PP.getSourceManager(); - return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L'; + const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); + return IsStringPrefix(StringRef(Ptr, Tok.getLength()), + LangOpts.CPlusPlus0x); } if (Tok.getLength() < 256) { char Buffer[256]; const char *TokPtr = Buffer; - if (PP.getSpelling(Tok, TokPtr) != 1) - return false; - return TokPtr[0] == 'L'; + unsigned length = PP.getSpelling(Tok, TokPtr); + return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x); } - return PP.getSpelling(Tok) == "L"; + return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x); } TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { @@ -132,7 +143,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, // source. If they were, it must be okay to stick them together: if there // were an issue, the tokens would have been lexed differently. if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() && - PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) == + PrevTok.getLocation().getLocWithOffset(PrevTok.getLength()) == Tok.getLocation()) return false; @@ -179,24 +190,19 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, if (Tok.is(tok::numeric_constant)) return GetFirstChar(PP, Tok) != '.'; - if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* || - Tok.is(tok::wide_char_literal)*/) + if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) || + Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) || + Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) || + Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant)) return true; // If this isn't identifier + string, we're done. if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal)) return false; - // FIXME: need a wide_char_constant! - - // If the string was a wide string L"foo" or wide char L'f', it would - // concat with the previous identifier into fooL"bar". Avoid this. - if (StartsWithL(Tok)) - return true; - // Otherwise, this is a narrow character or string. If the *identifier* - // is a literal 'L', avoid pasting L "foo" -> L"foo". - return IsIdentifierL(PrevTok); + // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo". + return IsIdentifierStringPrefix(PrevTok); case tok::numeric_constant: return isalnum(FirstChar) || Tok.is(tok::numeric_constant) || FirstChar == '+' || FirstChar == '-' || FirstChar == '.'; diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index 8ff82f160033..a58054490fcd 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -43,7 +43,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroArgs *Actuals) { MacroExpansionStart = SourceLocation(); SourceManager &SM = PP.getSourceManager(); - MacroStartSLocOffset = SM.getNextOffset(); + MacroStartSLocOffset = SM.getNextLocalOffset(); if (NumTokens > 0) { assert(Tokens[0].getLocation().isValid()); @@ -55,12 +55,12 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroArgs *Actuals) { // definition. Tokens that get lexed directly from the definition will // have their locations pointing inside this chunk. This is to avoid // creating separate source location entries for each token. - SourceLocation macroStart = SM.getInstantiationLoc(Tokens[0].getLocation()); - MacroDefStartInfo = SM.getDecomposedLoc(macroStart); - MacroExpansionStart = SM.createInstantiationLoc(macroStart, - ExpandLocStart, - ExpandLocEnd, - Macro->getDefinitionLength(SM)); + MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation()); + MacroDefLength = Macro->getDefinitionLength(SM); + MacroExpansionStart = SM.createExpansionLoc(MacroDefStart, + ExpandLocStart, + ExpandLocEnd, + MacroDefLength); } // If this is a function-like macro, expand the arguments and change @@ -121,9 +121,8 @@ void TokenLexer::destroy() { /// Expand the arguments of a function-like macro so that we can quickly /// return preexpanded tokens from Tokens. void TokenLexer::ExpandFunctionArguments() { - SourceManager &SM = PP.getSourceManager(); - llvm::SmallVector<Token, 128> ResultToks; + SmallVector<Token, 128> ResultToks; // Loop through 'Tokens', expanding them into ResultToks. Keep // track of whether we change anything. If not, no need to keep them. If so, @@ -144,19 +143,22 @@ void TokenLexer::ExpandFunctionArguments() { int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo()); assert(ArgNo != -1 && "Token following # is not an argument?"); - SourceLocation hashInstLoc; - if(ExpandLocStart.isValid()) { - hashInstLoc = getMacroExpansionLocation(CurTok.getLocation()); - assert(hashInstLoc.isValid() && "Expected '#' to come from definition"); - } + SourceLocation ExpansionLocStart = + getExpansionLocForMacroDefLoc(CurTok.getLocation()); + SourceLocation ExpansionLocEnd = + getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation()); Token Res; if (CurTok.is(tok::hash)) // Stringify - Res = ActualArgs->getStringifiedArgument(ArgNo, PP, hashInstLoc); + Res = ActualArgs->getStringifiedArgument(ArgNo, PP, + ExpansionLocStart, + ExpansionLocEnd); else { // 'charify': don't bother caching these. Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), - PP, true, hashInstLoc); + PP, true, + ExpansionLocStart, + ExpansionLocEnd); } // The stringified/charified string leading space flag gets set to match @@ -225,16 +227,9 @@ void TokenLexer::ExpandFunctionArguments() { } if(ExpandLocStart.isValid()) { - SourceLocation curInst = - getMacroExpansionLocation(CurTok.getLocation()); - assert(curInst.isValid() && - "Expected arg identifier to come from definition"); - for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) { - Token &Tok = ResultToks[i]; - Tok.setLocation(SM.createMacroArgInstantiationLoc(Tok.getLocation(), - curInst, - Tok.getLength())); - } + updateLocForMacroArgTokens(CurTok.getLocation(), + ResultToks.begin()+FirstResult, + ResultToks.end()); } // If any tokens were substituted from the argument, the whitespace @@ -282,17 +277,8 @@ void TokenLexer::ExpandFunctionArguments() { } if (ExpandLocStart.isValid()) { - SourceLocation curInst = - getMacroExpansionLocation(CurTok.getLocation()); - assert(curInst.isValid() && - "Expected arg identifier to come from definition"); - for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size(); - i != e; ++i) { - Token &Tok = ResultToks[i]; - Tok.setLocation(SM.createMacroArgInstantiationLoc(Tok.getLocation(), - curInst, - Tok.getLength())); - } + updateLocForMacroArgTokens(CurTok.getLocation(), + ResultToks.end()-NumToks, ResultToks.end()); } // If this token (the macro argument) was supposed to get leading @@ -417,18 +403,15 @@ void TokenLexer::Lex(Token &Tok) { // that captures all of this. if (ExpandLocStart.isValid() && // Don't do this for token streams. // Check that the token's location was not already set properly. - SM.isBeforeInSourceLocationOffset(Tok.getLocation(), - MacroStartSLocOffset)) { + SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { SourceLocation instLoc; if (Tok.is(tok::comment)) { - instLoc = SM.createInstantiationLoc(Tok.getLocation(), - ExpandLocStart, - ExpandLocEnd, - Tok.getLength()); + instLoc = SM.createExpansionLoc(Tok.getLocation(), + ExpandLocStart, + ExpandLocEnd, + Tok.getLength()); } else { - instLoc = getMacroExpansionLocation(Tok.getLocation()); - assert(instLoc.isValid() && - "Location for token not coming from definition was not set!"); + instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); } Tok.setLocation(instLoc); @@ -469,6 +452,7 @@ void TokenLexer::Lex(Token &Tok) { bool TokenLexer::PasteTokens(Token &Tok) { llvm::SmallString<128> Buffer; const char *ResultTokStrPtr = 0; + SourceLocation StartLoc = Tok.getLocation(); SourceLocation PasteOpLoc; do { // Consume the ## operator. @@ -562,7 +546,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { if (isInvalid) { // Test for the Microsoft extension of /##/ turning into // here on the // error path. - if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && + if (PP.getLangOptions().MicrosoftExt && Tok.is(tok::slash) && RHS.is(tok::slash)) { HandleMicrosoftCommentPaste(Tok); return true; @@ -574,14 +558,13 @@ bool TokenLexer::PasteTokens(Token &Tok) { // information so that the user knows where it came from. SourceManager &SM = PP.getSourceManager(); SourceLocation Loc = - SM.createInstantiationLoc(PasteOpLoc, ExpandLocStart, - ExpandLocEnd, 2); + SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2); // If we're in microsoft extensions mode, downgrade this from a hard // error to a warning that defaults to an error. This allows // disabling it. PP.Diag(Loc, - PP.getLangOptions().Microsoft ? diag::err_pp_bad_paste_ms - : diag::err_pp_bad_paste) + PP.getLangOptions().MicrosoftExt ? diag::err_pp_bad_paste_ms + : diag::err_pp_bad_paste) << Buffer.str(); } @@ -604,23 +587,20 @@ bool TokenLexer::PasteTokens(Token &Tok) { Tok = Result; } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); + SourceLocation EndLoc = Tokens[CurToken - 1].getLocation(); + // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any // diagnostics for the expanded token should appear as if the token was - // expanded from the (##) operator. Pull this information together into + // expanded from the full ## expression. Pull this information together into // a new SourceLocation that captures all of this. - if (ExpandLocStart.isValid()) { - SourceManager &SM = PP.getSourceManager(); - SourceLocation pasteLocInst = - getMacroExpansionLocation(PasteOpLoc); - assert(pasteLocInst.isValid() && - "Expected '##' to come from definition"); - - Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(), - pasteLocInst, - pasteLocInst, - Tok.getLength())); - } + SourceManager &SM = PP.getSourceManager(); + if (StartLoc.isFileID()) + StartLoc = getExpansionLocForMacroDefLoc(StartLoc); + if (EndLoc.isFileID()) + EndLoc = getExpansionLocForMacroDefLoc(EndLoc); + Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc, + Tok.getLength())); // Now that we got the result token, it will be subject to expansion. Since // token pasting re-lexes the result token in raw mode, identifier information @@ -666,22 +646,111 @@ void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) { PP.HandleMicrosoftCommentPaste(Tok); } -/// \brief If \arg loc is a FileID and points inside the current macro +/// \brief If \arg loc is a file ID and points inside the current macro /// definition, returns the appropriate source location pointing at the -/// macro expansion source location entry. -SourceLocation TokenLexer::getMacroExpansionLocation(SourceLocation loc) const { +/// macro expansion source location entry, otherwise it returns an invalid +/// SourceLocation. +SourceLocation +TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() && "Not appropriate for token streams"); - assert(loc.isValid()); + assert(loc.isValid() && loc.isFileID()); SourceManager &SM = PP.getSourceManager(); - unsigned relativeOffset; - if (loc.isFileID() && - SM.isInFileID(loc, - MacroDefStartInfo.first, MacroDefStartInfo.second, - Macro->getDefinitionLength(SM), &relativeOffset)) { - return MacroExpansionStart.getFileLocWithOffset(relativeOffset); + assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) && + "Expected loc to come from the macro definition"); + + unsigned relativeOffset = 0; + SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset); + return MacroExpansionStart.getLocWithOffset(relativeOffset); +} + +/// \brief Finds the tokens that are consecutive (from the same FileID) +/// creates a single SLocEntry, and assigns SourceLocations to each token that +/// point to that SLocEntry. e.g for +/// assert(foo == bar); +/// There will be a single SLocEntry for the "foo == bar" chunk and locations +/// for the 'foo', '==', 'bar' tokens will point inside that chunk. +/// +/// \arg begin_tokens will be updated to a position past all the found +/// consecutive tokens. +static void updateConsecutiveMacroArgTokens(SourceManager &SM, + SourceLocation InstLoc, + Token *&begin_tokens, + Token * end_tokens) { + assert(begin_tokens < end_tokens); + + SourceLocation FirstLoc = begin_tokens->getLocation(); + SourceLocation CurLoc = FirstLoc; + + // Compare the source location offset of tokens and group together tokens that + // are close, even if their locations point to different FileIDs. e.g. + // + // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs) + // ^ ^ + // |bar foo cake| (one SLocEntry chunk for all tokens) + // + // we can perform this "merge" since the token's spelling location depends + // on the relative offset. + + Token *NextTok = begin_tokens + 1; + for (; NextTok < end_tokens; ++NextTok) { + int RelOffs; + if (!SM.isInSameSLocAddrSpace(CurLoc, NextTok->getLocation(), &RelOffs)) + break; // Token from different local/loaded location. + // Check that token is not before the previous token or more than 50 + // "characters" away. + if (RelOffs < 0 || RelOffs > 50) + break; + CurLoc = NextTok->getLocation(); } - return SourceLocation(); + // For the consecutive tokens, find the length of the SLocEntry to contain + // all of them. + Token &LastConsecutiveTok = *(NextTok-1); + int LastRelOffs = 0; + SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), + &LastRelOffs); + unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength(); + + // Create a macro expansion SLocEntry that will "contain" all of the tokens. + SourceLocation Expansion = + SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); + + // Change the location of the tokens from the spelling location to the new + // expanded location. + for (; begin_tokens < NextTok; ++begin_tokens) { + Token &Tok = *begin_tokens; + int RelOffs = 0; + SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); + Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); + } +} + +/// \brief Creates SLocEntries and updates the locations of macro argument +/// tokens to their new expanded locations. +/// +/// \param ArgIdDefLoc the location of the macro argument id inside the macro +/// definition. +/// \param Tokens the macro argument tokens to update. +void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, + Token *begin_tokens, + Token *end_tokens) { + SourceManager &SM = PP.getSourceManager(); + + SourceLocation InstLoc = + getExpansionLocForMacroDefLoc(ArgIdSpellLoc); + + while (begin_tokens < end_tokens) { + // If there's only one token just create a SLocEntry for it. + if (end_tokens - begin_tokens == 1) { + Token &Tok = *begin_tokens; + Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), + InstLoc, + Tok.getLength())); + return; + } + + updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); + } } |