diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2009-10-14 18:03:49 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2009-10-14 18:03:49 +0000 |
commit | 4c8b24812ddcd1dedaca343a6d4e76f91f398981 (patch) | |
tree | 137ebebcae16fb0ce7ab4af456992bbd8d22fced /lib/Lex | |
parent | 5362a71c02e7d448a8ce98cf00c47e353fba5d04 (diff) |
Notes
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/CMakeLists.txt | 4 | ||||
-rw-r--r-- | lib/Lex/HeaderMap.cpp | 44 | ||||
-rw-r--r-- | lib/Lex/HeaderSearch.cpp | 112 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 436 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 267 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.cpp | 34 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.h | 26 | ||||
-rw-r--r-- | lib/Lex/MacroInfo.cpp | 12 | ||||
-rw-r--r-- | lib/Lex/PPCaching.cpp | 5 | ||||
-rw-r--r-- | lib/Lex/PPDirectives.cpp | 352 | ||||
-rw-r--r-- | lib/Lex/PPExpressions.cpp | 100 | ||||
-rw-r--r-- | lib/Lex/PPLexerChange.cpp | 68 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 169 | ||||
-rw-r--r-- | lib/Lex/PTHLexer.cpp | 220 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 170 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 134 | ||||
-rw-r--r-- | lib/Lex/PreprocessorLexer.cpp | 4 | ||||
-rw-r--r-- | lib/Lex/ScratchBuffer.cpp | 10 | ||||
-rw-r--r-- | lib/Lex/TokenConcatenation.cpp | 42 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 120 |
20 files changed, 1244 insertions, 1085 deletions
diff --git a/lib/Lex/CMakeLists.txt b/lib/Lex/CMakeLists.txt index a7237a7b76f6c..81a1e01f964d6 100644 --- a/lib/Lex/CMakeLists.txt +++ b/lib/Lex/CMakeLists.txt @@ -14,13 +14,13 @@ add_clang_library(clangLex PPExpressions.cpp PPLexerChange.cpp PPMacroExpansion.cpp + PTHLexer.cpp Pragma.cpp Preprocessor.cpp PreprocessorLexer.cpp - PTHLexer.cpp ScratchBuffer.cpp - TokenLexer.cpp TokenConcatenation.cpp + TokenLexer.cpp ) add_dependencies(clangLex ClangDiagnosticLex) diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp index 4c8b70eb78213..c9a10dc02707c 100644 --- a/lib/Lex/HeaderMap.cpp +++ b/lib/Lex/HeaderMap.cpp @@ -28,8 +28,8 @@ using namespace clang; enum { HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p', HMAP_HeaderVersion = 1, - - HMAP_EmptyBucketKey = 0 + + HMAP_EmptyBucketKey = 0 }; namespace clang { @@ -58,7 +58,7 @@ struct HMapHeader { /// linear probing based on this function. static inline unsigned HashHMapKey(const char *S, const char *End) { unsigned Result = 0; - + for (; S != End; S++) Result += tolower(*S) * 13; return Result; @@ -78,27 +78,27 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE) { // If the file is too small to be a header map, ignore it. unsigned FileSize = FE->getSize(); if (FileSize <= sizeof(HMapHeader)) return 0; - - llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer( + + llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer( llvm::MemoryBuffer::getFile(FE->getName(), 0, FE->getSize())); if (FileBuffer == 0) return 0; // Unreadable file? const char *FileStart = FileBuffer->getBufferStart(); // We know the file is at least as big as the header, check it now. const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart); - + // Sniff it to see if it's a headermap by checking the magic number and // version. bool NeedsByteSwap; - if (Header->Magic == HMAP_HeaderMagicNumber && + if (Header->Magic == HMAP_HeaderMagicNumber && Header->Version == HMAP_HeaderVersion) NeedsByteSwap = false; else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) && Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion)) NeedsByteSwap = true; // Mixed endianness headermap. - else + else return 0; // Not a header map. - + if (Header->Reserved != 0) return 0; // Okay, everything looks good, create the header map. @@ -137,11 +137,11 @@ const HMapHeader &HeaderMap::getHeader() const { HMapBucket HeaderMap::getBucket(unsigned BucketNo) const { HMapBucket Result; Result.Key = HMAP_EmptyBucketKey; - - const HMapBucket *BucketArray = + + const HMapBucket *BucketArray = reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() + sizeof(HMapHeader)); - + const HMapBucket *BucketPtr = BucketArray+BucketNo; if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) { Result.Prefix = 0; @@ -161,11 +161,11 @@ HMapBucket HeaderMap::getBucket(unsigned BucketNo) const { const char *HeaderMap::getString(unsigned StrTabIdx) const { // Add the start of the string table to the idx. StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset); - + // Check for invalid index. if (StrTabIdx >= FileBuffer->getBufferSize()) return 0; - + // Otherwise, we have a valid pointer into the file. Just return it. We know // that the "string" can not overrun the end of the file, because the buffer // is nul terminated by virtue of being a MemoryBuffer. @@ -191,15 +191,15 @@ static bool StringsEqualWithoutCase(const char *S1, const char *S2, void HeaderMap::dump() const { const HMapHeader &Hdr = getHeader(); unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); - - fprintf(stderr, "Header Map %s:\n %d buckets, %d entries\n", + + fprintf(stderr, "Header Map %s:\n %d buckets, %d entries\n", getFileName(), NumBuckets, getEndianAdjustedWord(Hdr.NumEntries)); - + for (unsigned i = 0; i != NumBuckets; ++i) { HMapBucket B = getBucket(i); if (B.Key == HMAP_EmptyBucketKey) continue; - + const char *Key = getString(B.Key); const char *Prefix = getString(B.Prefix); const char *Suffix = getString(B.Suffix); @@ -219,22 +219,22 @@ const FileEntry *HeaderMap::LookupFile(const char *FilenameStart, // Don't probe infinitely. if (NumBuckets & (NumBuckets-1)) return 0; - + // Linearly probe the hash table. for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) { HMapBucket B = getBucket(Bucket & (NumBuckets-1)); if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss. - + // See if the key matches. If not, probe on. const char *Key = getString(B.Key); unsigned BucketKeyLen = strlen(Key); if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart)) continue; - + // See if the actual strings equal. if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen)) continue; - + // If so, we have a match in the hash table. Construct the destination // path. llvm::SmallString<1024> DestPath; diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index 9023b11022b1b..2b9b7c977ceb3 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -35,7 +35,7 @@ HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) { HeaderSearch::HeaderSearch(FileManager &FM) : FileMgr(FM), FrameworkMap(64) { SystemDirIdx = 0; NoCurDirSearch = false; - + ExternalLookup = 0; NumIncluded = 0; NumMultiIncludeFileOptzn = 0; @@ -47,7 +47,7 @@ HeaderSearch::~HeaderSearch() { for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i) delete HeaderMaps[i].second; } - + void HeaderSearch::PrintStats() { fprintf(stderr, "\n*** HeaderSearch Stats:\n"); fprintf(stderr, "%d files tracked.\n", (int)FileInfo.size()); @@ -61,11 +61,11 @@ void HeaderSearch::PrintStats() { fprintf(stderr, " %d #import/#pragma once files.\n", NumOnceOnlyFiles); fprintf(stderr, " %d included exactly once.\n", NumSingleIncludedFiles); fprintf(stderr, " %d max times a file is included.\n", MaxNumIncludes); - + fprintf(stderr, " %d #include/#include_next/#import.\n", NumIncluded); fprintf(stderr, " %d #includes skipped due to" " the multi-include optimization.\n", NumMultiIncludeFileOptzn); - + fprintf(stderr, "%d framework lookups.\n", NumFrameworkLookups); fprintf(stderr, "%d subframework lookups.\n", NumSubFrameworkLookups); } @@ -79,15 +79,15 @@ const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) { for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i) // Pointer equality comparison of FileEntries works because they are // already uniqued by inode. - if (HeaderMaps[i].first == FE) + if (HeaderMaps[i].first == FE) return HeaderMaps[i].second; } - + if (const HeaderMap *HM = HeaderMap::Create(FE)) { HeaderMaps.push_back(std::make_pair(FE, HM)); return HM; } - + return 0; } @@ -121,10 +121,10 @@ const FileEntry *DirectoryLookup::LookupFile(const char *FilenameStart, TmpDir.append(FilenameStart, FilenameEnd); return HS.getFileMgr().getFile(TmpDir.begin(), TmpDir.end()); } - + if (isFramework()) return DoFrameworkLookup(FilenameStart, FilenameEnd, HS); - + assert(isHeaderMap() && "Unknown directory lookup"); return getHeaderMap()->LookupFile(FilenameStart, FilenameEnd,HS.getFileMgr()); } @@ -136,63 +136,63 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(const char *FilenameStart, const char *FilenameEnd, HeaderSearch &HS) const { FileManager &FileMgr = HS.getFileMgr(); - + // Framework names must have a '/' in the filename. const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); if (SlashPos == FilenameEnd) return 0; - + // Find out if this is the home for the specified framework, by checking // HeaderSearch. Possible answer are yes/no and unknown. - const DirectoryEntry *&FrameworkDirCache = + const DirectoryEntry *&FrameworkDirCache = HS.LookupFrameworkCache(FilenameStart, SlashPos); - + // If it is known and in some other directory, fail. if (FrameworkDirCache && FrameworkDirCache != getFrameworkDir()) return 0; - + // Otherwise, construct the path to this framework dir. - + // FrameworkName = "/System/Library/Frameworks/" llvm::SmallString<1024> FrameworkName; FrameworkName += getFrameworkDir()->getName(); if (FrameworkName.empty() || FrameworkName.back() != '/') FrameworkName.push_back('/'); - + // FrameworkName = "/System/Library/Frameworks/Cocoa" FrameworkName.append(FilenameStart, SlashPos); - + // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/" FrameworkName += ".framework/"; - + // If the cache entry is still unresolved, query to see if the cache entry is // still unresolved. If so, check its existence now. if (FrameworkDirCache == 0) { HS.IncrementFrameworkLookupCount(); - + // If the framework dir doesn't exist, we fail. // FIXME: It's probably more efficient to query this with FileMgr.getDir. - if (!llvm::sys::Path(std::string(FrameworkName.begin(), + if (!llvm::sys::Path(std::string(FrameworkName.begin(), FrameworkName.end())).exists()) return 0; - + // Otherwise, if it does, remember that this is the right direntry for this // framework. FrameworkDirCache = getFrameworkDir(); } - + // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h" unsigned OrigSize = FrameworkName.size(); - + FrameworkName += "Headers/"; FrameworkName.append(SlashPos+1, FilenameEnd); if (const FileEntry *FE = FileMgr.getFile(FrameworkName.begin(), FrameworkName.end())) { return FE; } - + // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" const char *Private = "Private"; - FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, + FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, Private+strlen(Private)); return FileMgr.getFile(FrameworkName.begin(), FrameworkName.end()); } @@ -209,7 +209,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(const char *FilenameStart, /// non-null, indicates where the #including file is, in case a relative search /// is needed. const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart, - const char *FilenameEnd, + const char *FilenameEnd, bool isAngled, const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir, @@ -220,11 +220,11 @@ const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart, // If this was an #include_next "/absolute/file", fail. if (FromDir) return 0; - + // Otherwise, just return the file. return FileMgr.getFile(FilenameStart, FilenameEnd); } - + // Step #0, unless disabled, check to see if the file is in the #includer's // directory. This has to be based on CurFileEnt, not CurDir, because // CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and @@ -249,17 +249,17 @@ const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart, return FE; } } - + CurDir = 0; // If this is a system #include, ignore the user #include locs. unsigned i = isAngled ? SystemDirIdx : 0; - + // If this is a #include_next request, start searching after the directory the // file was found in. if (FromDir) i = FromDir-&SearchDirs[0]; - + // Cache all of the lookups performed by this method. Many headers are // multiply included, and the "pragma once" optimization prevents them from // being relex/pp'd, but they would still have to search through a @@ -279,23 +279,23 @@ const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart, // start point value. CacheLookup.first = i+1; } - + // Check each directory in sequence to see if it contains this file. for (; i != SearchDirs.size(); ++i) { - const FileEntry *FE = + const FileEntry *FE = SearchDirs[i].LookupFile(FilenameStart, FilenameEnd, *this); if (!FE) continue; - + CurDir = &SearchDirs[i]; - + // This file is a system header or C++ unfriendly if the dir is. getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic(); - + // Remember this location for the next lookup we do. CacheLookup.second = i; return FE; } - + // Otherwise, didn't find it. Remember we didn't find this. CacheLookup.second = SearchDirs.size(); return 0; @@ -311,20 +311,20 @@ LookupSubframeworkHeader(const char *FilenameStart, const char *FilenameEnd, const FileEntry *ContextFileEnt) { assert(ContextFileEnt && "No context file?"); - + // Framework names must have a '/' in the filename. Find it. const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); if (SlashPos == FilenameEnd) return 0; - + // Look up the base framework name of the ContextFileEnt. const char *ContextName = ContextFileEnt->getName(); - + // If the context info wasn't a framework, couldn't be a subframework. const char *FrameworkPos = strstr(ContextName, ".framework/"); if (FrameworkPos == 0) return 0; - - llvm::SmallString<1024> FrameworkName(ContextName, + + llvm::SmallString<1024> FrameworkName(ContextName, FrameworkPos+strlen(".framework/")); // Append Frameworks/HIToolbox.framework/ @@ -334,28 +334,28 @@ LookupSubframeworkHeader(const char *FilenameStart, llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup = FrameworkMap.GetOrCreateValue(FilenameStart, SlashPos); - + // Some other location? if (CacheLookup.getValue() && CacheLookup.getKeyLength() == FrameworkName.size() && memcmp(CacheLookup.getKeyData(), &FrameworkName[0], CacheLookup.getKeyLength()) != 0) return 0; - + // Cache subframework. if (CacheLookup.getValue() == 0) { ++NumSubFrameworkLookups; - + // If the framework dir doesn't exist, we fail. const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.begin(), FrameworkName.end()); if (Dir == 0) return 0; - + // Otherwise, if it does, remember that this is the right direntry for this // framework. CacheLookup.setValue(Dir); } - + const FileEntry *FE = 0; // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h" @@ -364,7 +364,7 @@ LookupSubframeworkHeader(const char *FilenameStart, HeadersFilename.append(SlashPos+1, FilenameEnd); if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end()))) { - + // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" HeadersFilename = FrameworkName; HeadersFilename += "PrivateHeaders/"; @@ -372,7 +372,7 @@ LookupSubframeworkHeader(const char *FilenameStart, if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end()))) return 0; } - + // This file is a system header or C++ unfriendly if the old file is. // // Note that the temporary 'DirInfo' is required here, as either call to @@ -394,7 +394,7 @@ HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) { if (FE->getUID() >= FileInfo.size()) FileInfo.resize(FE->getUID()+1); return FileInfo[FE->getUID()]; -} +} void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) { if (UID >= FileInfo.size()) @@ -410,13 +410,13 @@ bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ // Get information about this file. HeaderFileInfo &FileInfo = getFileInfo(File); - + // If this is a #import directive, check that we have not already imported // this header. if (isImport) { // If this has already been imported, don't import it again. FileInfo.isImport = true; - + // Has this already been #import'ed or #include'd? if (FileInfo.NumIncludes) return false; } else { @@ -425,19 +425,19 @@ bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ if (FileInfo.isImport) return false; } - + // Next, check to see if the file is wrapped with #ifndef guards. If so, and // if the macro that guards it is defined, we know the #include has no effect. - if (const IdentifierInfo *ControllingMacro + if (const IdentifierInfo *ControllingMacro = FileInfo.getControllingMacro(ExternalLookup)) if (ControllingMacro->hasMacroDefinition()) { ++NumMultiIncludeFileOptzn; return false; } - + // Increment the number of times this file has been included. ++FileInfo.NumIncludes; - + return true; } diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 6f1043ae73533..c8b9a5d5420ae 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -39,7 +39,7 @@ static void InitCharacterInfo(); // Token Class Implementation //===----------------------------------------------------------------------===// -/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. +/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { if (IdentifierInfo *II = getIdentifierInfo()) return II->getObjCKeywordID() == objcKey; @@ -57,35 +57,36 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { // Lexer Class Implementation //===----------------------------------------------------------------------===// -void Lexer::InitLexer(const char *BufStart, const char *BufPtr, +void Lexer::InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd) { InitCharacterInfo(); - + BufferStart = BufStart; BufferPtr = BufPtr; BufferEnd = BufEnd; - + assert(BufEnd[0] == 0 && "We assume that the input buffer has a null character at the end" " to simplify lexing!"); - - Is_PragmaLexer = false; + Is_PragmaLexer = false; + IsEofCodeCompletion = false; + // Start of the file is a start of line. IsAtStartOfLine = true; - + // We are not after parsing a #. ParsingPreprocessorDirective = false; - + // We are not after parsing #include. ParsingFilename = false; - + // We are not in raw mode. Raw mode disables diagnostics and interpretation // of tokens (e.g. identifiers, thus disabling macro expansion). It is used // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block // or otherwise skipping over tokens. LexingRawMode = false; - + // Default to not keeping comments. ExtendedTokenMode = 0; } @@ -98,14 +99,18 @@ Lexer::Lexer(FileID FID, Preprocessor &PP) : PreprocessorLexer(&PP, FID), FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), Features(PP.getLangOptions()) { - + const llvm::MemoryBuffer *InputFile = PP.getSourceManager().getBuffer(FID); InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), InputFile->getBufferEnd()); - + // Default to keeping comments if the preprocessor wants them. SetCommentRetentionState(PP.getCommentRetentionState()); + + // If the input file is truncated, the EOF is a code-completion token. + if (PP.getSourceManager().isTruncatedFile(FID)) + IsEofCodeCompletion = true; } /// Lexer constructor - Create a new raw lexer object. This object is only @@ -116,7 +121,7 @@ Lexer::Lexer(SourceLocation fileloc, const LangOptions &features, : FileLoc(fileloc), Features(features) { InitLexer(BufStart, BufPtr, BufEnd); - + // We *are* in raw mode. LexingRawMode = true; } @@ -128,9 +133,9 @@ Lexer::Lexer(FileID FID, const SourceManager &SM, const LangOptions &features) : FileLoc(SM.getLocForStartOfFile(FID)), Features(features) { const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID); - InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(), + InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(), FromFile->getBufferEnd()); - + // We *are* in raw mode. LexingRawMode = true; } @@ -150,7 +155,7 @@ Lexer::Lexer(FileID FID, const SourceManager &SM, const LangOptions &features) /// interface that could handle this stuff. This would pull GetMappedTokenLoc /// out of the critical path of the lexer! /// -Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, +Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation InstantiationLocStart, SourceLocation InstantiationLocEnd, unsigned TokLen, Preprocessor &PP) { @@ -159,12 +164,12 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, // Create the lexer as if we were going to lex the file normally. FileID SpellingFID = SM.getFileID(SpellingLoc); Lexer *L = new Lexer(SpellingFID, PP); - + // Now that the lexer is created, change the start/end locations so that we // just lex the subsection of the file that we want. This is lexing from a // scratch buffer. const char *StrData = SM.getCharacterData(SpellingLoc); - + L->BufferPtr = StrData; L->BufferEnd = StrData+TokLen; assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!"); @@ -174,11 +179,11 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID), InstantiationLocStart, InstantiationLocEnd, TokLen); - + // Ensure that the lexer thinks it is inside a directive, so that end \n will // return an EOM token. L->ParsingPreprocessorDirective = true; - + // This lexer really is for _Pragma. L->Is_PragmaLexer = true; return L; @@ -220,7 +225,7 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, const LangOptions &LangOpts) { // TODO: this could be special cased for common tokens like identifiers, ')', // etc to make this faster, if it mattered. Just look at StrData[0] to handle - // all obviously single-char tokens. This could use + // all obviously single-char tokens. This could use // Lexer::isObviouslySimpleCharacter for example to handle identifiers or // something. @@ -233,6 +238,7 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, // Create a lexer starting at the beginning of this token. Lexer TheLexer(Loc, LangOpts, Buffer.first, StrData, Buffer.second); + TheLexer.SetCommentRetentionState(true); Token TheTok; TheLexer.LexFromRawLexer(TheTok); return TheTok.getLength(); @@ -242,8 +248,6 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, // Character information. //===----------------------------------------------------------------------===// -static unsigned char CharInfo[256]; - enum { CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0' CHAR_VERT_WS = 0x02, // '\r', '\n' @@ -253,25 +257,98 @@ enum { CHAR_PERIOD = 0x20 // . }; +// Statically initialize CharInfo table based on ASCII character set +// Reference: FreeBSD 7.2 /usr/share/misc/ascii +static const unsigned char CharInfo[256] = +{ +// 0 NUL 1 SOH 2 STX 3 ETX +// 4 EOT 5 ENQ 6 ACK 7 BEL + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +// 8 BS 9 HT 10 NL 11 VT +//12 NP 13 CR 14 SO 15 SI + 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS, + CHAR_HORZ_WS, CHAR_VERT_WS, 0 , 0 , +//16 DLE 17 DC1 18 DC2 19 DC3 +//20 DC4 21 NAK 22 SYN 23 ETB + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +//24 CAN 25 EM 26 SUB 27 ESC +//28 FS 29 GS 30 RS 31 US + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +//32 SP 33 ! 34 " 35 # +//36 $ 37 % 38 & 39 ' + CHAR_HORZ_WS, 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +//40 ( 41 ) 42 * 43 + +//44 , 45 - 46 . 47 / + 0 , 0 , 0 , 0 , + 0 , 0 , CHAR_PERIOD , 0 , +//48 0 49 1 50 2 51 3 +//52 4 53 5 54 6 55 7 + CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , + CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , +//56 8 57 9 58 : 59 ; +//60 < 61 = 62 > 63 ? + CHAR_NUMBER , CHAR_NUMBER , 0 , 0 , + 0 , 0 , 0 , 0 , +//64 @ 65 A 66 B 67 C +//68 D 69 E 70 F 71 G + 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//72 H 73 I 74 J 75 K +//76 L 77 M 78 N 79 O + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//80 P 81 Q 82 R 83 S +//84 T 85 U 86 V 87 W + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//88 X 89 Y 90 Z 91 [ +//92 \ 93 ] 94 ^ 95 _ + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 , + 0 , 0 , 0 , CHAR_UNDER , +//96 ` 97 a 98 b 99 c +//100 d 101 e 102 f 103 g + 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//104 h 105 i 106 j 107 k +//108 l 109 m 110 n 111 o + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//112 p 113 q 114 r 115 s +//116 t 117 u 118 v 119 w + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//120 x 121 y 122 z 123 { +//124 | 125 } 126 ~ 127 DEL + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 , + 0 , 0 , 0 , 0 +}; + static void InitCharacterInfo() { static bool isInited = false; if (isInited) return; - isInited = true; - - // Intiialize the CharInfo table. - // TODO: statically initialize this. - CharInfo[(int)' '] = CharInfo[(int)'\t'] = - CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS; - CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS; - - CharInfo[(int)'_'] = CHAR_UNDER; - CharInfo[(int)'.'] = CHAR_PERIOD; - for (unsigned i = 'a'; i <= 'z'; ++i) - CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER; + // check the statically-initialized CharInfo table + assert(CHAR_HORZ_WS == CharInfo[(int)' ']); + assert(CHAR_HORZ_WS == CharInfo[(int)'\t']); + assert(CHAR_HORZ_WS == CharInfo[(int)'\f']); + assert(CHAR_HORZ_WS == CharInfo[(int)'\v']); + assert(CHAR_VERT_WS == CharInfo[(int)'\n']); + assert(CHAR_VERT_WS == CharInfo[(int)'\r']); + assert(CHAR_UNDER == CharInfo[(int)'_']); + assert(CHAR_PERIOD == CharInfo[(int)'.']); + for (unsigned i = 'a'; i <= 'z'; ++i) { + assert(CHAR_LETTER == CharInfo[i]); + assert(CHAR_LETTER == CharInfo[i+'A'-'a']); + } for (unsigned i = '0'; i <= '9'; ++i) - CharInfo[i] = CHAR_NUMBER; + assert(CHAR_NUMBER == CharInfo[i]); + isInited = true; } + /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. static inline bool isIdentifierBody(unsigned char c) { @@ -294,7 +371,7 @@ static inline bool isWhitespace(unsigned char c) { /// isNumberBody - Return true if this is the body character of an /// preprocessing number, which is [a-zA-Z0-9_.]. static inline bool isNumberBody(unsigned char c) { - return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ? + return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ? true : false; } @@ -315,22 +392,22 @@ static SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen) { assert(FileLoc.isMacroID() && "Must be an instantiation"); - + // Otherwise, we're lexing "mapped tokens". This is used for things like // _Pragma handling. Combine the instantiation location of FileLoc with the // spelling location. SourceManager &SM = PP.getSourceManager(); - + // Create a new SLoc which is expanded from Instantiation(FileLoc) but whose // characters come from spelling(FileLoc)+Offset. SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc); SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo); - + // Figure out the expansion loc range, which is the range covered by the // original _Pragma(...) sequence. std::pair<SourceLocation,SourceLocation> II = SM.getImmediateInstantiationRange(FileLoc); - + return SM.createInstantiationLoc(SpellingLoc, II.first, II.second, TokLen); } @@ -346,7 +423,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc, unsigned CharNo = Loc-BufferStart; if (FileLoc.isFileID()) return FileLoc.getFileLocWithOffset(CharNo); - + // Otherwise, this is the _Pragma lexer case, which pretends that all of the // tokens are lexed from where the _Pragma was defined. assert(PP && "This doesn't work on raw lexers"); @@ -387,13 +464,13 @@ static char GetTrigraphCharForLetter(char Letter) { static char DecodeTrigraphChar(const char *CP, Lexer *L) { char Res = GetTrigraphCharForLetter(*CP); if (!Res || !L) return Res; - + if (!L->getFeatures().Trigraphs) { if (!L->isLexingRawMode()) L->Diag(CP-2, diag::trigraph_ignored); return 0; } - + if (!L->isLexingRawMode()) L->Diag(CP-2, diag::trigraph_converted) << std::string()+Res; return Res; @@ -401,12 +478,12 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) { /// getEscapedNewLineSize - Return the size of the specified escaped newline, /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a -/// trigraph equivalent on entry to this function. +/// trigraph equivalent on entry to this function. unsigned Lexer::getEscapedNewLineSize(const char *Ptr) { unsigned Size = 0; while (isWhitespace(Ptr[Size])) { ++Size; - + if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r') continue; @@ -414,10 +491,10 @@ unsigned Lexer::getEscapedNewLineSize(const char *Ptr) { if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && Ptr[Size-1] != Ptr[Size]) ++Size; - + return Size; - } - + } + // Not an escaped newline, must be a \t or something else. return 0; } @@ -438,7 +515,7 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { } else { return P; } - + unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape); if (NewLineSize == 0) return P; P = AfterEscape+NewLineSize; @@ -472,7 +549,7 @@ char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, Slash: // Common case, backslash-char where the char is not whitespace. if (!isWhitespace(Ptr[0])) return '\\'; - + // See if we have optional whitespace characters between the slash and // newline. if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { @@ -482,18 +559,18 @@ Slash: // Warn if there was whitespace between the backslash and newline. if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode()) Diag(Ptr, diag::backslash_newline_space); - + // Found backslash<whitespace><newline>. Parse the char after it. Size += EscapedNewLineSize; Ptr += EscapedNewLineSize; // Use slow version to accumulate a correct size field. return getCharAndSizeSlow(Ptr, Size, Tok); } - + // Otherwise, this is not an escaped newline, just return the slash. return '\\'; } - + // If this is a trigraph, process it. if (Ptr[0] == '?' && Ptr[1] == '?') { // If this is actually a legal trigraph (not something like "??x"), emit @@ -508,7 +585,7 @@ Slash: return C; } } - + // If this is neither, return a single character. ++Size; return *Ptr; @@ -530,21 +607,21 @@ char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, Slash: // Common case, backslash-char where the char is not whitespace. if (!isWhitespace(Ptr[0])) return '\\'; - + // See if we have optional whitespace characters followed by a newline. if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { // Found backslash<whitespace><newline>. Parse the char after it. Size += EscapedNewLineSize; Ptr += EscapedNewLineSize; - + // Use slow version to accumulate a correct size field. return getCharAndSizeSlowNoWarn(Ptr, Size, Features); } - + // Otherwise, this is not an escaped newline, just return the slash. return '\\'; } - + // If this is a trigraph, process it. if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { // If this is actually a legal trigraph (not something like "??x"), return @@ -556,7 +633,7 @@ Slash: return C; } } - + // If this is neither, return a single character. ++Size; return *Ptr; @@ -582,34 +659,34 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { FinishIdentifier: const char *IdStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::identifier); - + // If we are in raw mode, return this identifier raw. There is no need to // look up identifier information or attempt to macro expand it. if (LexingRawMode) return; - + // Fill in Result.IdentifierInfo, looking up the identifier in the // identifier table. IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart); - + // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. Result.setKind(II->getTokenID()); - + // Finally, now that we know we have an identifier, pass this off to the // preprocessor, which may macro expand it or something. if (II->isHandleIdentifierCase()) PP->HandleIdentifier(Result); return; } - + // Otherwise, $,\,? in identifier found. Enter slower path. - + C = getCharAndSize(CurPtr, Size); while (1) { if (C == '$') { // If we hit a $ and they are not supported in identifiers, we are done. if (!Features.DollarIdents) goto FinishIdentifier; - + // Otherwise, emit a diagnostic and continue. if (!isLexingRawMode()) Diag(CurPtr, diag::ext_dollar_in_identifier); @@ -645,7 +722,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { PrevCh = C; C = getCharAndSize(CurPtr, Size); } - + // If we fell out, check for a sign, due to 1e+12. If we have one, continue. if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); @@ -653,7 +730,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { // If we have a hex FP constant, continue. if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); - + // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::numeric_constant); @@ -664,7 +741,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { /// either " or L". void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { const char *NulCharacter = 0; // Does this string contain the \0 character? - + char C = getAndAdvanceChar(CurPtr, Result); while (C != '"') { // Skip escaped characters. @@ -682,7 +759,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { } C = getAndAdvanceChar(CurPtr, Result); } - + // If a nul character existed in the string, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_string); @@ -716,11 +793,11 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { } C = getAndAdvanceChar(CurPtr, Result); } - + // If a nul character existed in the string, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_string); - + // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::angle_string_literal); @@ -745,7 +822,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { // FIXME: UCN's. C = getAndAdvanceChar(CurPtr, Result); } - + if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') { ++CurPtr; } else { @@ -767,7 +844,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } while (C != '\''); } - + if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_char); @@ -789,17 +866,17 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { // Skip horizontal whitespace very aggressively. while (isHorizontalWhitespace(Char)) Char = *++CurPtr; - + // Otherwise if we have something other than whitespace, we're done. if (Char != '\n' && Char != '\r') break; - + if (ParsingPreprocessorDirective) { // End of preprocessor directive line, let LexTokenInternal handle this. BufferPtr = CurPtr; return false; } - + // ok, but handle newline. // The returned token is at the start of the line. Result.setFlag(Token::StartOfLine); @@ -818,7 +895,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { FormTokenWithChars(Result, CurPtr, tok::unknown); return true; } - + BufferPtr = CurPtr; return false; } @@ -832,12 +909,12 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { // extension warning. if (!Features.BCPLComment && !isLexingRawMode()) { Diag(BufferPtr, diag::ext_bcpl_comment); - + // Mark them enabled so we only emit one warning for this translation // unit. Features.BCPLComment = true; } - + // Scan over the body of the comment. The common case, when scanning, is that // the comment contains normal ascii characters with nothing interesting in // them. As such, optimize for this case with the inner loop. @@ -847,7 +924,7 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character. // If we find a \n character, scan backwards, checking to see if it's an // escaped newline, like we do for block comments. - + // Skip over characters in the fast loop. while (C != 0 && // Potentially EOF. C != '\\' && // Potentially escaped newline. @@ -858,7 +935,7 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { // If this is a newline, we're done. if (C == '\n' || C == '\r') break; // Found the newline? Break out! - + // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to // properly decode the character. Read it in raw mode to avoid emitting // diagnostics about things like trigraphs. If we see an escaped newline, @@ -876,7 +953,7 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { --CurPtr; C = 'x'; // doesn't matter what this is. } - + // If we read multiple characters, and one of those characters was a \r or // \n, then we had an escaped newline within the comment. Emit diagnostic // unless the next line is also a // comment. @@ -892,21 +969,21 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') break; } - + if (!isLexingRawMode()) Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment); break; } } - + if (CurPtr == BufferEnd+1) { --CurPtr; break; } } while (C != '\n' && C != '\r'); // Found but did not consume the newline. if (PP) - PP->HandleComment(SourceRange(getSourceLocation(BufferPtr), + PP->HandleComment(SourceRange(getSourceLocation(BufferPtr), getSourceLocation(CurPtr))); - + // If we are returning comments as tokens, return this comment as a token. if (inKeepCommentMode()) return SaveBCPLComment(Result, CurPtr); @@ -917,14 +994,14 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { BufferPtr = CurPtr; return false; } - + // Otherwise, eat the \n character. We don't care if this is a \n\r or // \r\n sequence. This is an efficiency hack (because we know the \n can't // contribute to another token), it isn't needed for correctness. Note that // this is ok even in KeepWhitespaceMode, because we would have returned the /// comment above in that mode. ++CurPtr; - + // The next returned token is at the start of the line. Result.setFlag(Token::StartOfLine); // No leading whitespace seen so far. @@ -939,17 +1016,17 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { // If we're not in a preprocessor directive, just return the // comment // directly. FormTokenWithChars(Result, CurPtr, tok::comment); - + if (!ParsingPreprocessorDirective) return true; - + // If this BCPL-style comment is in a macro definition, transmogrify it into // a C-style block comment. std::string Spelling = PP->getSpelling(Result); assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?"); Spelling[1] = '*'; // Change prefix to "/*". Spelling += "*/"; // add suffix. - + Result.setKind(tok::comment); PP->CreateString(&Spelling[0], Spelling.size(), Result, Result.getLocation()); @@ -959,13 +1036,13 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline /// character (either \n or \r) is part of an escaped newline sequence. Issue a /// diagnostic if so. We know that the newline is inside of a block comment. -static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, +static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L) { assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); - + // Back up off the newline. --CurPtr; - + // If this is a two-character newline sequence, skip the other character. if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { // \n\n or \r\r -> not escaped newline. @@ -974,7 +1051,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, // \n\r or \r\n -> skip the newline. --CurPtr; } - + // If we have horizontal whitespace, skip over it. We allow whitespace // between the slash and newline. bool HasSpace = false; @@ -982,7 +1059,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, --CurPtr; HasSpace = true; } - + // If we have a slash, we know this is an escaped newline. if (*CurPtr == '\\') { if (CurPtr[-1] != '*') return false; @@ -991,7 +1068,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || CurPtr[-3] != '*') return false; - + // This is the trigraph ending the comment. Emit a stern warning! CurPtr -= 2; @@ -1005,15 +1082,15 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, if (!L->isLexingRawMode()) L->Diag(CurPtr, diag::trigraph_ends_block_comment); } - + // Warn about having an escaped newline between the */ characters. if (!L->isLexingRawMode()) L->Diag(CurPtr, diag::escaped_newline_block_comment_end); - + // If there was space between the backslash and newline, warn about it. if (HasSpace && !L->isLexingRawMode()) L->Diag(CurPtr, diag::backslash_newline_space); - + return true; } @@ -1049,23 +1126,23 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { if (!isLexingRawMode()) Diag(BufferPtr, diag::err_unterminated_block_comment); --CurPtr; - + // KeepWhitespaceMode should return this broken comment as a token. Since // it isn't a well formed comment, just return it as an 'unknown' token. if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); return true; } - + BufferPtr = CurPtr; return false; } - + // Check to see if the first character after the '/*' is another /. If so, // then this slash does not end the block comment, it is part of it. if (C == '/') C = *CurPtr++; - + while (1) { // Skip over all non-interesting characters until we find end of buffer or a // (probably ending) '/' character. @@ -1073,7 +1150,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // While not aligned to a 16-byte boundary. while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) C = *CurPtr++; - + if (C == '/') goto FoundSlash; #ifdef __SSE2__ @@ -1084,13 +1161,13 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { CurPtr += 16; #elif __ALTIVEC__ __vector unsigned char Slashes = { - '/', '/', '/', '/', '/', '/', '/', '/', + '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/' }; while (CurPtr+16 <= BufferEnd && !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes)) CurPtr += 16; -#else +#else // Scan for '/' quickly. Many block comments are very large. while (CurPtr[0] != '/' && CurPtr[1] != '/' && @@ -1100,20 +1177,20 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { CurPtr += 4; } #endif - + // It has to be one of the bytes scanned, increment to it and read one. C = *CurPtr++; } - + // Loop to scan the remainder. while (C != '/' && C != '\0') C = *CurPtr++; - + FoundSlash: if (C == '/') { if (CurPtr[-2] == '*') // We found the final */. We're done! break; - + if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) { // We found the final */, though it had an escaped newline between the @@ -1135,22 +1212,22 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // after the /*, but this would involve lexing a lot of what really is the // comment, which surely would confuse the parser. --CurPtr; - + // KeepWhitespaceMode should return this broken comment as a token. Since // it isn't a well formed comment, just return it as an 'unknown' token. if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); return true; } - + BufferPtr = CurPtr; return false; } C = *CurPtr++; } - - if (PP) - PP->HandleComment(SourceRange(getSourceLocation(BufferPtr), + + if (PP) + PP->HandleComment(SourceRange(getSourceLocation(BufferPtr), getSourceLocation(CurPtr))); // If we are returning comments as tokens, return this comment as a token. @@ -1208,11 +1285,11 @@ std::string Lexer::ReadToEndOfLine() { // Okay, we found the end of the line. First, back up past the \0, \r, \n. assert(CurPtr[-1] == Char && "Trigraphs for newline?"); BufferPtr = CurPtr-1; - + // Next, lex the character, which should handle the EOM transition. Lex(Tmp); assert(Tmp.is(tok::eom) && "Unexpected token!"); - + // Finally, we're done, return the string we found. return Result; } @@ -1232,12 +1309,12 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { ParsingPreprocessorDirective = false; // Update the location of token as well as BufferPtr. FormTokenWithChars(Result, CurPtr, tok::eom); - + // Restore comment saving mode, in case it was disabled for directive. SetCommentRetentionState(PP->getCommentRetentionState()); return true; // Have a token. - } - + } + // If we are in raw mode, return this event as an EOF token. Let the caller // that put us in raw mode handle the event. if (isLexingRawMode()) { @@ -1246,23 +1323,44 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { FormTokenWithChars(Result, BufferEnd, tok::eof); return true; } - - // Otherwise, issue diagnostics for unterminated #if and missing newline. + // Otherwise, check if we are code-completing, then issue diagnostics for + // unterminated #if and missing newline. + + if (IsEofCodeCompletion) { + bool isIntendedFile = true; + if (PP && FileLoc.isFileID()) { + SourceManager &SM = PP->getSourceManager(); + isIntendedFile = SM.isTruncatedFile(SM.getFileID(FileLoc)); + } + + if (isIntendedFile) { + // We're at the end of the file, but we've been asked to consider the + // end of the file to be a code-completion token. Return the + // code-completion token. + Result.startToken(); + FormTokenWithChars(Result, CurPtr, tok::code_completion); + + // Only do the eof -> code_completion translation once. + IsEofCodeCompletion = false; + return true; + } + } + // If we are in a #if directive, emit an error. while (!ConditionalStack.empty()) { PP->Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); ConditionalStack.pop_back(); } - + // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue // a pedwarn. if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) Diag(BufferEnd, diag::ext_no_newline_eof) << CodeModificationHint::CreateInsertion(getSourceLocation(BufferEnd), "\n"); - + BufferPtr = CurPtr; // Finally, let the preprocessor handle this. @@ -1275,27 +1373,27 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { /// lexer. unsigned Lexer::isNextPPTokenLParen() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); - + // Switch to 'skipping' mode. This will ensure that we can lex a token // without emitting diagnostics, disables macro expansion, and will cause EOF // to return an EOF token instead of popping the include stack. LexingRawMode = true; - + // Save state that can be changed while lexing so that we can restore it. const char *TmpBufferPtr = BufferPtr; bool inPPDirectiveMode = ParsingPreprocessorDirective; - + Token Tok; Tok.startToken(); LexTokenInternal(Tok); - + // Restore state that may have changed. BufferPtr = TmpBufferPtr; ParsingPreprocessorDirective = inPPDirectiveMode; - + // Restore the lexer back to non-skipping mode. LexingRawMode = false; - + if (Tok.is(tok::eof)) return 2; return Tok.is(tok::l_paren); @@ -1304,17 +1402,15 @@ unsigned Lexer::isNextPPTokenLParen() { /// LexTokenInternal - This implements a simple C family lexer. It is an /// extremely performance critical piece of code. This assumes that the buffer -/// has a null character at the end of the file. Return true if an error -/// occurred and compilation should terminate, false if normal. This returns a -/// preprocessing token, not a normal token, as such, it is an internal -/// interface. It assumes that the Flags of result have been cleared before -/// calling this. +/// has a null character at the end of the file. This returns a preprocessing +/// token, not a normal token, as such, it is an internal interface. It assumes +/// that the Flags of result have been cleared before calling this. void Lexer::LexTokenInternal(Token &Result) { LexNextToken: // New token, can't need cleaning yet. Result.clearFlag(Token::NeedsCleaning); Result.setIdentifierInfo(0); - + // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; @@ -1323,7 +1419,7 @@ LexNextToken: ++CurPtr; while ((*CurPtr == ' ') || (*CurPtr == '\t')) ++CurPtr; - + // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the // whitespace. @@ -1331,17 +1427,17 @@ LexNextToken: FormTokenWithChars(Result, CurPtr, tok::unknown); return; } - + BufferPtr = CurPtr; Result.setFlag(Token::LeadingSpace); } - + unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. - + // Read a character, advancing over it. char Char = getAndAdvanceChar(CurPtr, Result); tok::TokenKind Kind; - + switch (Char) { case 0: // Null. // Found end of file? @@ -1354,13 +1450,13 @@ LexNextToken: assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); return PPCache->Lex(Result); } - + if (!isLexingRawMode()) Diag(CurPtr-1, diag::null_in_file); Result.setFlag(Token::LeadingSpace); if (SkipWhitespace(Result, CurPtr)) return; // KeepWhitespaceMode - + goto LexNextToken; // GCC isn't tail call eliminating. case '\n': case '\r': @@ -1369,13 +1465,13 @@ LexNextToken: if (ParsingPreprocessorDirective) { // Done parsing the "line". ParsingPreprocessorDirective = false; - + // Restore comment saving mode, in case it was disabled for directive. SetCommentRetentionState(PP->getCommentRetentionState()); - + // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; - + Kind = tok::eom; break; } @@ -1383,7 +1479,7 @@ LexNextToken: Result.setFlag(Token::StartOfLine); // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); - + if (SkipWhitespace(Result, CurPtr)) return; // KeepWhitespaceMode goto LexNextToken; // GCC isn't tail call eliminating. @@ -1398,7 +1494,7 @@ LexNextToken: SkipIgnoredUnits: CurPtr = BufferPtr; - + // If the next token is obviously a // or /* */ comment, skip it efficiently // too (without going through the big switch stmt). if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && @@ -1420,7 +1516,7 @@ LexNextToken: // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); return LexNumericConstant(Result, CurPtr); - + case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz"). // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); @@ -1435,7 +1531,7 @@ LexNextToken: if (Char == '\'') return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); // FALL THROUGH, treating L like the start of an identifier. - + // C99 6.4.2: Identifiers. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N': @@ -1458,10 +1554,10 @@ LexNextToken: MIOpt.ReadToken(); return LexIdentifier(Result, CurPtr); } - + Kind = tok::unknown; break; - + // C99 6.4.4: Character Constants. case '\'': // Notify MIOpt that we read a non-whitespace/non-comment token. @@ -1527,7 +1623,7 @@ LexNextToken: Kind = tok::amp; } break; - case '*': + case '*': if (getCharAndSize(CurPtr, SizeTmp) == '=') { Kind = tok::starequal; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); @@ -1552,7 +1648,7 @@ LexNextToken: if (Char == '-') { // -- CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::minusminus; - } else if (Char == '>' && Features.CPlusPlus && + } else if (Char == '>' && Features.CPlusPlus && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->* CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); @@ -1593,20 +1689,20 @@ LexNextToken: getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') { if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return; // KeepCommentMode - + // It is common for the tokens immediately after a // comment to be // whitespace (indentation for the next line). Instead of going through // the big switch, handle it efficiently now. goto SkipIgnoredUnits; } } - + if (Char == '*') { // /**/ comment. if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) return; // KeepCommentMode goto LexNextToken; // GCC isn't tail call eliminating. } - + if (Char == '=') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::slashequal; @@ -1642,7 +1738,7 @@ LexNextToken: if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) { FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); - + // As an optimization, if the preprocessor didn't switch lexers, tail // recurse. if (PP->isCurrentLexer(this)) { @@ -1655,10 +1751,10 @@ LexNextToken: } goto LexNextToken; // GCC isn't tail call eliminating. } - + return PP->Lex(Result); } - + Kind = tok::hash; } } else { @@ -1695,7 +1791,7 @@ LexNextToken: if (Char == '=') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::greaterequal; - } else if (Char == '>' && + } else if (Char == '>' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), SizeTmp2, Result); @@ -1736,7 +1832,7 @@ LexNextToken: } else if (Features.CPlusPlus && Char == ':') { Kind = tok::coloncolon; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else { + } else { Kind = tok::colon; } break; @@ -1748,7 +1844,7 @@ LexNextToken: if (Char == '=') { Kind = tok::equalequal; CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - } else { + } else { Kind = tok::equal; } break; @@ -1773,7 +1869,7 @@ LexNextToken: if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) { FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); - + // As an optimization, if the preprocessor didn't switch lexers, tail // recurse. if (PP->isCurrentLexer(this)) { @@ -1788,7 +1884,7 @@ LexNextToken: } return PP->Lex(Result); } - + Kind = tok::hash; } break; @@ -1800,7 +1896,7 @@ LexNextToken: else Kind = tok::unknown; break; - + case '\\': // FIXME: UCN's. // FALL THROUGH. @@ -1808,7 +1904,7 @@ LexNextToken: Kind = tok::unknown; break; } - + // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 37ea52b46f9d8..42dd75e59b94d 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -16,6 +16,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringExtras.h" using namespace clang; @@ -43,7 +44,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, switch (ResultChar) { // These map to themselves. case '\\': case '\'': case '"': case '?': break; - + // These have fixed mappings. case 'a': // TODO: K&R: the meaning of '\\a' is different in traditional C @@ -82,7 +83,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, HadError = 1; break; } - + // Hex escapes are a maximal series of hex digits. bool Overflow = false; for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { @@ -95,13 +96,15 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, } // See if any bits will be truncated when evaluated as a character. - unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide); - + unsigned CharWidth = IsWide + ? PP.getTargetInfo().getWCharWidth() + : PP.getTargetInfo().getCharWidth(); + if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { Overflow = true; ResultChar &= ~0U >> (32-CharWidth); } - + // Check for overflow. if (Overflow) // Too many digits to fit in PP.Diag(Loc, diag::warn_hex_escape_too_large); @@ -122,17 +125,19 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, ++NumDigits; } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 && ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7'); - + // Check for overflow. Reject '\777', but not L'\777'. - unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide); - + unsigned CharWidth = IsWide + ? PP.getTargetInfo().getWCharWidth() + : PP.getTargetInfo().getCharWidth(); + if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { PP.Diag(Loc, diag::warn_octal_escape_too_large); ResultChar &= ~0U >> (32-CharWidth); } break; } - + // Otherwise, these are not valid escapes. case '(': case '{': case '[': case '%': // GCC accepts these as extensions. We warn about them as such though. @@ -146,7 +151,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, PP.Diag(Loc, diag::ext_unknown_escape) << "x"+llvm::utohexstr(ResultChar); break; } - + return ResultChar; } @@ -154,16 +159,16 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, /// convert the UTF32 to UTF8. This is a subroutine of StringLiteralParser. /// When we decide to implement UCN's for character constants and identifiers, /// we will likely rework our support for UCN's. -static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, - char *&ResultBuf, bool &HadError, - SourceLocation Loc, bool IsWide, Preprocessor &PP) +static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, + char *&ResultBuf, bool &HadError, + SourceLocation Loc, bool IsWide, Preprocessor &PP) { // FIXME: Add a warning - UCN's are only valid in C++ & C99. // FIXME: Handle wide strings. - + // Save the beginning of the string (for error diagnostics). const char *ThisTokBegin = ThisTokBuf; - + // Skip the '\u' char's. ThisTokBuf += 2; @@ -173,7 +178,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, return; } typedef uint32_t UTF32; - + UTF32 UcnVal = 0; unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) { @@ -189,10 +194,10 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, HadError = 1; return; } - // Check UCN constraints (C99 6.4.3p2). + // Check UCN constraints (C99 6.4.3p2). if ((UcnVal < 0xa0 && (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, ` - || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF) + || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF) || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ { PP.Diag(Loc, diag::err_ucn_escape_invalid); HadError = 1; @@ -201,7 +206,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8. // The conversion below was inspired by: // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c - // First, we determine how many bytes the result will require. + // First, we determine how many bytes the result will require. typedef uint8_t UTF8; unsigned short bytesToWrite = 0; @@ -213,13 +218,13 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, bytesToWrite = 3; else bytesToWrite = 4; - + const unsigned byteMask = 0xBF; const unsigned byteMark = 0x80; - + // Once the bits are split out into bytes of UTF8, this is a mask OR-ed // into the first byte, depending on how many bytes follow. - static const UTF8 firstByteMark[5] = { + static const UTF8 firstByteMark[5] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0 }; // Finally, we write the bytes into ResultBuf. @@ -239,13 +244,13 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, /// decimal-constant integer-suffix /// octal-constant integer-suffix /// hexadecimal-constant integer-suffix -/// decimal-constant: +/// decimal-constant: /// nonzero-digit /// decimal-constant digit -/// octal-constant: +/// octal-constant: /// 0 /// octal-constant octal-digit -/// hexadecimal-constant: +/// hexadecimal-constant: /// hexadecimal-prefix hexadecimal-digit /// hexadecimal-constant hexadecimal-digit /// hexadecimal-prefix: one of @@ -267,7 +272,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, /// u U /// long-suffix: one of /// l L -/// long-long-suffix: one of +/// long-long-suffix: one of /// ll LL /// /// floating-constant: [C99 6.4.4.2] @@ -277,14 +282,14 @@ NumericLiteralParser:: NumericLiteralParser(const char *begin, const char *end, SourceLocation TokLoc, Preprocessor &pp) : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) { - + // This routine assumes that the range begin/end matches the regex for integer // and FP constants (specifically, the 'pp-number' regex), and assumes that // the byte at "*end" is both valid and not part of the regex. Because of // this, it doesn't have to check for 'overscan' in various places. assert(!isalnum(*end) && *end != '.' && *end != '_' && "Lexer didn't maximally munch?"); - + s = DigitsBegin = begin; saw_exponent = false; saw_period = false; @@ -293,8 +298,9 @@ NumericLiteralParser(const char *begin, const char *end, isLongLong = false; isFloat = false; isImaginary = false; + isMicrosoftInteger = false; hadError = false; - + if (*s == '0') { // parse radix ParseNumberStartingWithZero(TokLoc); if (hadError) @@ -313,7 +319,7 @@ NumericLiteralParser(const char *begin, const char *end, s++; saw_period = true; s = SkipDigits(s); - } + } if ((*s == 'e' || *s == 'E')) { // exponent const char *Exponent = s; s++; @@ -332,11 +338,11 @@ NumericLiteralParser(const char *begin, const char *end, } SuffixBegin = s; - + // Parse the suffix. At this point we can classify whether we have an FP or // integer constant. bool isFPConstant = isFloatingLiteral(); - + // Loop over all of the characters of the suffix. If we see something bad, // we break out of the loop. for (; s != ThisTokEnd; ++s) { @@ -357,7 +363,7 @@ NumericLiteralParser(const char *begin, const char *end, case 'L': if (isLong || isLongLong) break; // Cannot be repeated. if (isFloat) break; // LF invalid. - + // Check for long long. The L's need to be adjacent and the same case. if (s+1 != ThisTokEnd && s[1] == s[0]) { if (isFPConstant) break; // long long invalid for floats. @@ -370,31 +376,50 @@ NumericLiteralParser(const char *begin, const char *end, case 'i': if (PP.getLangOptions().Microsoft) { // Allow i8, i16, i32, i64, and i128. - if (++s == ThisTokEnd) break; - switch (*s) { - case '8': - s++; // i8 suffix - break; - case '1': - if (++s == ThisTokEnd) break; - if (*s == '6') s++; // i16 suffix - else if (*s == '2') { - if (++s == ThisTokEnd) break; - if (*s == '8') s++; // i128 suffix - } - break; - case '3': - if (++s == ThisTokEnd) break; - if (*s == '2') s++; // i32 suffix - break; - case '6': - if (++s == ThisTokEnd) break; - if (*s == '4') s++; // i64 suffix - break; - default: - break; + if (s + 1 != ThisTokEnd) { + switch (s[1]) { + case '8': + s += 2; // i8 suffix + isMicrosoftInteger = true; + continue; + case '1': + s += 2; + if (s == ThisTokEnd) break; + if (*s == '6') s++; // i16 suffix + else if (*s == '2') { + if (++s == ThisTokEnd) break; + if (*s == '8') s++; // i128 suffix + } + isMicrosoftInteger = true; + continue; + case '3': + s += 2; + if (s == ThisTokEnd) break; + if (*s == '2') s++; // i32 suffix + isMicrosoftInteger = true; + continue; + case '6': + s += 2; + if (s == ThisTokEnd) break; + if (*s == '4') s++; // i64 suffix + isMicrosoftInteger = true; + continue; + case 'f': // FP Suffix for "float" + case 'F': + if (!isFPConstant) break; // Error for integer constant. + if (isFloat || isLong) break; // FF, LF invalid. + isFloat = true; + if (isImaginary) break; // Cannot be repeated. + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), + diag::ext_imaginary_constant); + isImaginary = true; + s++; + continue; // Success. + default: + break; + } + break; } - break; } // fall through. case 'I': @@ -409,7 +434,7 @@ NumericLiteralParser(const char *begin, const char *end, // If we reached here, there was an error. break; } - + // Report an error if there are any. if (s != ThisTokEnd) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), @@ -424,12 +449,12 @@ NumericLiteralParser(const char *begin, const char *end, /// ParseNumberStartingWithZero - This method is called when the first character /// of the number is found to be a zero. This means it is either an octal /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or -/// a floating point number (01239.123e4). Eat the prefix, determining the +/// a floating point number (01239.123e4). Eat the prefix, determining the /// radix etc. void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { assert(s[0] == '0' && "Invalid method call"); s++; - + // Handle a hex number like 0x1234. if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) { s++; @@ -444,7 +469,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { s = SkipHexDigits(s); } // A binary exponent can appear with or with a '.'. If dotted, the - // binary exponent is required. + // binary exponent is required. if (*s == 'p' || *s == 'P') { const char *Exponent = s; s++; @@ -458,7 +483,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { return; } s = first_non_digit; - + if (!PP.getLangOptions().HexFloats) PP.Diag(TokLoc, diag::ext_hexconstant_invalid); } else if (saw_period) { @@ -468,7 +493,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { } return; } - + // Handle simple binary numbers 0b01010 if (*s == 'b' || *s == 'B') { // 0b101010 is a GCC extension. @@ -487,16 +512,16 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // Other suffixes will be diagnosed by the caller. return; } - + // For now, the radix is set to 8. If we discover that we have a // floating point constant, the radix will change to 10. Octal floating - // point constants are not permitted (only decimal and hexadecimal). + // point constants are not permitted (only decimal and hexadecimal). radix = 8; DigitsBegin = s; s = SkipOctalDigits(s); if (s == ThisTokEnd) return; // Done, simple octal number like 01234 - + // If we have some other non-octal digit that *is* a decimal digit, see if // this is part of a floating point number like 094.123 or 09e1. if (isdigit(*s)) { @@ -506,7 +531,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { radix = 10; } } - + // If we have a hex digit other than 'e' (which denotes a FP exponent) then // the code is using an incorrect base. if (isxdigit(*s) && *s != 'e' && *s != 'E') { @@ -515,7 +540,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { hadError = true; return; } - + if (*s == '.') { s++; radix = 10; @@ -532,7 +557,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { if (first_non_digit != s) { s = first_non_digit; } else { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), diag::err_exponent_has_no_digits); hadError = true; return; @@ -552,7 +577,7 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { // handles the common cases that matter (small decimal integers and // hex/octal values which don't overflow). unsigned MaxBitsPerDigit = 1; - while ((1U << MaxBitsPerDigit) < radix) + while ((1U << MaxBitsPerDigit) < radix) MaxBitsPerDigit += 1; if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) { uint64_t N = 0; @@ -571,16 +596,16 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { llvm::APInt RadixVal(Val.getBitWidth(), radix); llvm::APInt CharVal(Val.getBitWidth(), 0); llvm::APInt OldVal = Val; - + bool OverflowOccurred = false; while (s < SuffixBegin) { unsigned C = HexDigitValue(*s++); - + // If this letter is out of bound for this radix, reject it. assert(C < radix && "NumericLiteralParser ctor should have rejected this"); - + CharVal = C; - + // Add the digit to the value in the appropriate radix. If adding in digits // made the value smaller, then this overflowed. OldVal = Val; @@ -600,21 +625,24 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { llvm::APFloat NumericLiteralParser:: GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) { using llvm::APFloat; - + using llvm::StringRef; + llvm::SmallVector<char,256> floatChars; - for (unsigned i = 0, n = ThisTokEnd-ThisTokBegin; i != n; ++i) + unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin); + for (unsigned i = 0; i != n; ++i) floatChars.push_back(ThisTokBegin[i]); - + floatChars.push_back('\0'); - + APFloat V (Format, APFloat::fcZero, false); APFloat::opStatus status; - - status = V.convertFromString(&floatChars[0],APFloat::rmNearestTiesToEven); - + + status = V.convertFromString(StringRef(&floatChars[0], n), + APFloat::rmNearestTiesToEven); + if (isExact) *isExact = status == APFloat::opOK; - + return V; } @@ -623,16 +651,16 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP) { // At this point we know that the character matches the regex "L?'.*'". HadError = false; - + // Determine if this is a wide character. IsWide = begin[0] == 'L'; if (IsWide) ++begin; - + // Skip over the entry quote. assert(begin[0] == '\'' && "Invalid token lexed"); ++begin; - // FIXME: The "Value" is an uint64_t so we can handle char literals of + // FIXME: The "Value" is an uint64_t so we can handle char literals of // upto 64-bits. // FIXME: This extensively assumes that 'char' is 8-bits. assert(PP.getTargetInfo().getCharWidth() == 8 && @@ -643,9 +671,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, assert(PP.getTargetInfo().getWCharWidth() <= 64 && "Assumes sizeof(wchar) on target is <= 64"); - // This is what we will use for overflow detection + // This is what we will use for overflow detection llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0); - + unsigned NumCharsSoFar = 0; while (begin[0] != '\'') { uint64_t ResultChar; @@ -668,7 +696,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, LitVal <<= 8; } } - + LitVal = LitVal + ResultChar; ++NumCharsSoFar; } @@ -684,11 +712,12 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, else PP.Diag(Loc, diag::ext_four_char_character_literal); IsMultiChar = true; - } + } else + IsMultiChar = false; // Transfer the value from APInt to uint64_t Value = LitVal.getZExtValue(); - + // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1") // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple // character constants are not sign extended in the this implementation: @@ -743,7 +772,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, MaxTokenLength = StringToks[0].getLength(); SizeBound = StringToks[0].getLength()-2; // -2 for "". AnyWide = StringToks[0].is(tok::wide_string_literal); - + hadError = false; // Implement Translation Phase #6: concatenation of string literals @@ -752,20 +781,20 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, // The string could be shorter than this if it needs cleaning, but this is a // reasonable bound, which is all we need. SizeBound += StringToks[i].getLength()-2; // -2 for "". - + // Remember maximum string piece length. - if (StringToks[i].getLength() > MaxTokenLength) + if (StringToks[i].getLength() > MaxTokenLength) MaxTokenLength = StringToks[i].getLength(); - + // Remember if we see any wide strings. AnyWide |= StringToks[i].is(tok::wide_string_literal); } // Include space for the null terminator. ++SizeBound; - + // TODO: K&R warning: "traditional C rejects string constant concatenation" - + // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true. wchar_tByteWidth = ~0U; @@ -774,25 +803,25 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!"); wchar_tByteWidth /= 8; } - + // The output buffer size needs to be large enough to hold wide characters. // This is a worst-case assumption which basically corresponds to L"" "long". if (AnyWide) SizeBound *= wchar_tByteWidth; - + // Size the temporary buffer to hold the result string data. ResultBuf.resize(SizeBound); - + // Likewise, but for each string piece. llvm::SmallString<512> TokenBuf; TokenBuf.resize(MaxTokenLength); - + // Loop over all the strings, getting their spelling, and expanding them to // wide strings as appropriate. ResultPtr = &ResultBuf[0]; // Next byte to fill in. - + Pascal = false; - + for (unsigned i = 0, e = NumStringToks; i != e; ++i) { const char *ThisTokBuf = &TokenBuf[0]; // Get the spelling of the token, which eliminates trigraphs, etc. We know @@ -800,23 +829,23 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, // and 'spelled' tokens can only shrink. unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf); const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. - + // TODO: Input character set mapping support. - + // Skip L marker for wide strings. bool ThisIsWide = false; if (ThisTokBuf[0] == 'L') { ++ThisTokBuf; ThisIsWide = true; } - + assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); ++ThisTokBuf; - + // Check if this is a pascal string if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { - + // If the \p sequence is found in the first token, we have a pascal string // Otherwise, if we already have a pascal string, ignore the first \p if (i == 0) { @@ -825,7 +854,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, } else if (Pascal) ThisTokBuf += 2; } - + while (ThisTokBuf != ThisTokEnd) { // Is this a span of non-escape characters? if (ThisTokBuf[0] != '\\') { @@ -833,7 +862,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, do { ++ThisTokBuf; } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); - + // Copy the character span over. unsigned Len = ThisTokBuf-InStart; if (!AnyWide) { @@ -852,7 +881,7 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, } // Is this a Universal Character Name escape? if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { - ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, + ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, hadError, StringToks[i].getLocation(), ThisIsWide, PP); continue; } @@ -860,17 +889,17 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError, StringToks[i].getLocation(), ThisIsWide, PP); - + // Note: our internal rep of wide char tokens is always little-endian. *ResultPtr++ = ResultChar & 0xFF; - + if (AnyWide) { for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) *ResultPtr++ = ResultChar >> i*8; } } } - + if (Pascal) { ResultBuf[0] = ResultPtr-&ResultBuf[0]-1; @@ -895,31 +924,31 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, // Get the spelling of the token. llvm::SmallString<16> SpellingBuffer; SpellingBuffer.resize(Tok.getLength()); - + const char *SpellingPtr = &SpellingBuffer[0]; unsigned TokLen = PP.getSpelling(Tok, SpellingPtr); assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet"); - + const char *SpellingStart = SpellingPtr; const char *SpellingEnd = SpellingPtr+TokLen; // Skip over the leading quote. assert(SpellingPtr[0] == '"' && "Should be a string literal!"); ++SpellingPtr; - + // Skip over bytes until we find the offset we're looking for. while (ByteNo) { assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!"); - + // Step over non-escapes simply. if (*SpellingPtr != '\\') { ++SpellingPtr; --ByteNo; continue; } - + // Otherwise, this is an escape character. Advance over it. bool HadError = false; ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, @@ -927,6 +956,6 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, assert(!HadError && "This method isn't valid on erroneous strings"); --ByteNo; } - + return SpellingPtr-SpellingStart; } diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp index cba69b7d79190..c14d7c438d607 100644 --- a/lib/Lex/MacroArgs.cpp +++ b/lib/Lex/MacroArgs.cpp @@ -23,18 +23,18 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, unsigned NumToks, bool VarargsElided) { assert(MI->isFunctionLike() && "Can't have args for an object-like macro!"); - + // Allocate memory for the MacroArgs object with the lexer tokens at the end. MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) + NumToks*sizeof(Token)); // Construct the macroargs object. new (Result) MacroArgs(NumToks, VarargsElided); - + // Copy the actual unexpanded tokens to immediately after the result ptr. if (NumToks) memcpy(const_cast<Token*>(Result->getUnexpArgument(0)), UnexpArgTokens, NumToks*sizeof(Token)); - + return Result; } @@ -98,7 +98,7 @@ bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, const std::vector<Token> & MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { assert(Arg < NumUnexpArgTokens && "Invalid argument number!"); - + // If we have already computed this, return it. if (PreExpArgTokens.empty()) PreExpArgTokens.resize(NumUnexpArgTokens); @@ -108,12 +108,12 @@ MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { const Token *AT = getUnexpArgument(Arg); unsigned NumToks = getArgLength(AT)+1; // Include the EOF. - + // Otherwise, we have to pre-expand this argument, populating Result. To do // this, we set up a fake TokenLexer to lex from the unexpanded argument // list. With this installed, we lex expanded tokens until we hit the EOF // token at the end of the unexp list. - PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, + PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, false /*owns tokens*/); // Lex all of the macro-expanded tokens into Result. @@ -122,7 +122,7 @@ MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { Token &Tok = Result.back(); PP.Lex(Tok); } while (Result.back().isNot(tok::eof)); - + // Pop the token stream off the top of the stack. We know that the internal // pointer inside of it is to the "end" of the token stream, but the stack // will not otherwise be popped until the next token is lexed. The problem is @@ -145,18 +145,18 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, Tok.setKind(tok::string_literal); const Token *ArgTokStart = ArgToks; - + // Stringify all the tokens. llvm::SmallString<128> Result; Result += "\""; - + bool isFirst = true; for (; ArgToks->isNot(tok::eof); ++ArgToks) { const Token &Tok = *ArgToks; if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) Result += ' '; isFirst = false; - + // If this is a string or character constant, escape the token as specified // by 6.10.3.2p2. if (Tok.is(tok::string_literal) || // "foo" @@ -171,18 +171,18 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, Result.resize(CurStrLen+Tok.getLength()); const char *BufPtr = &Result[CurStrLen]; unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr); - + // If getSpelling returned a pointer to an already uniqued version of the // string instead of filling in BufPtr, memcpy it onto our string. if (BufPtr != &Result[CurStrLen]) memcpy(&Result[CurStrLen], BufPtr, ActualTokLen); - + // If the token was dirty, the spelling may be shorter than the token. if (ActualTokLen != Tok.getLength()) Result.resize(CurStrLen+ActualTokLen); } } - + // If the last character of the string is a \, and if it isn't escaped, this // is an invalid string literal, diagnose it as specified in C99. if (Result.back() == '\\') { @@ -199,27 +199,27 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, } } Result += '"'; - + // If this is the charify operation and the result is not a legal character // constant, diagnose it. if (Charify) { // First step, turn double quotes into single quotes: Result[0] = '\''; Result[Result.size()-1] = '\''; - + // Check for bogus character. bool isBad = false; if (Result.size() == 3) isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. else isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' - + if (isBad) { PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); Result = "' '"; // Use something arbitrary, but legal. } } - + PP.CreateString(&Result[0], Result.size(), Tok); return Tok; } diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h index 4b22fa18aa8b9..8dee5b3bc997b 100644 --- a/lib/Lex/MacroArgs.h +++ b/lib/Lex/MacroArgs.h @@ -20,7 +20,7 @@ namespace clang { class MacroInfo; class Preprocessor; class Token; - + /// MacroArgs - An instance of this class captures information about /// the formal arguments specified to a function-like macro invocation. class MacroArgs { @@ -45,7 +45,7 @@ class MacroArgs { /// if in strict mode and the C99 varargs macro had only a ... argument, this /// is false. bool VarargsElided; - + MacroArgs(unsigned NumToks, bool varargsElided) : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided) {} ~MacroArgs() {} @@ -55,46 +55,46 @@ public: static MacroArgs *create(const MacroInfo *MI, const Token *UnexpArgTokens, unsigned NumArgTokens, bool VarargsElided); - + /// destroy - Destroy and deallocate the memory for this object. /// void destroy(); - + /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected /// by pre-expansion, return false. Otherwise, conservatively return true. bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const; - + /// getUnexpArgument - Return a pointer to the first token of the unexpanded /// token list for the specified formal. /// const Token *getUnexpArgument(unsigned Arg) const; - + /// getArgLength - Given a pointer to an expanded or unexpanded argument, /// return the number of tokens, not counting the EOF, that make up the /// argument. static unsigned getArgLength(const Token *ArgPtr); - + /// getPreExpArgument - Return the pre-expanded form of the specified /// argument. const std::vector<Token> & - getPreExpArgument(unsigned Arg, Preprocessor &PP); - + getPreExpArgument(unsigned Arg, Preprocessor &PP); + /// getStringifiedArgument - Compute, cache, and return the specified argument /// that has been 'stringified' as required by the # operator. const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP); - + /// getNumArguments - Return the number of arguments passed into this macro /// invocation. unsigned getNumArguments() const { return NumUnexpArgTokens; } - - + + /// isVarargsElidedUse - Return true if this is a C99 style varargs macro /// invocation and there was no argument specified for the "..." argument. If /// the argument was specified (even empty) or this isn't a C99 style varargs /// function, or if in strict mode and the C99 varargs macro had only a ... /// argument, this returns false. bool isVarargsElidedUse() const { return VarargsElided; } - + /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of /// tokens into the literal string token that should be produced by the C # /// preprocessor operator. If Charify is true, then it should be turned into diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp index df89450f5a555..fda884c4da4c3 100644 --- a/lib/Lex/MacroInfo.cpp +++ b/lib/Lex/MacroInfo.cpp @@ -22,7 +22,7 @@ MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) { IsBuiltinMacro = false; IsDisabled = false; IsUsed = true; - + ArgumentList = 0; NumArguments = 0; } @@ -44,32 +44,32 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const { for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end(); I != E; ++I, ++OI) if (*I != *OI) return false; - + // Check all the tokens. for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) { const Token &A = ReplacementTokens[i]; const Token &B = Other.ReplacementTokens[i]; if (A.getKind() != B.getKind()) return false; - + // If this isn't the first first token, check that the whitespace and // start-of-line characteristics match. if (i != 0 && (A.isAtStartOfLine() != B.isAtStartOfLine() || A.hasLeadingSpace() != B.hasLeadingSpace())) return false; - + // If this is an identifier, it is easy. if (A.getIdentifierInfo() || B.getIdentifierInfo()) { if (A.getIdentifierInfo() != B.getIdentifierInfo()) return false; continue; } - + // Otherwise, check the spelling. if (PP.getSpelling(A) != PP.getSpelling(B)) return false; } - + return true; } diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp index 53aa09c130409..c3f0eeab58481 100644 --- a/lib/Lex/PPCaching.cpp +++ b/lib/Lex/PPCaching.cpp @@ -36,7 +36,7 @@ void Preprocessor::CommitBacktrackedTokens() { } /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since -/// EnableBacktrackAtThisPos() was previously called. +/// EnableBacktrackAtThisPos() was previously called. void Preprocessor::Backtrack() { assert(!BacktrackPositions.empty() && "EnableBacktrackAtThisPos was not called!"); @@ -102,7 +102,8 @@ void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) { assert((BacktrackPositions.empty() || BacktrackPositions.back() < i) && "The backtrack pos points inside the annotated tokens!"); // Replace the cached tokens with the single annotation token. - CachedTokens.erase(AnnotBegin + 1, CachedTokens.begin() + CachedLexPos); + if (i < CachedLexPos) + CachedTokens.erase(AnnotBegin + 1, CachedTokens.begin() + CachedLexPos); *AnnotBegin = Tok; CachedLexPos = i; return; diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index af59ded275448..196a77f6426ad 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -26,7 +26,7 @@ using namespace clang; MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) { MacroInfo *MI; - + if (!MICache.empty()) { MI = MICache.back(); MICache.pop_back(); @@ -61,13 +61,13 @@ void Preprocessor::DiscardUntilEndOfDirective() { void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { // Read the token, don't allow macro expansion on it. LexUnexpandedToken(MacroNameTok); - + // Missing macro name? if (MacroNameTok.is(tok::eom)) { Diag(MacroNameTok, diag::err_pp_missing_macro_name); return; } - + IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); if (II == 0) { std::string Spelling = getSpelling(MacroNameTok); @@ -93,7 +93,7 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { // Okay, we got a good identifier node. Return it. return; } - + // Invalid macro name, read and discard the rest of the line. Then set the // token kind to tok::eom. MacroNameTok.setKind(tok::eom); @@ -112,12 +112,12 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { Lex(Tmp); else LexUnexpandedToken(Tmp); - + // There should be no tokens after the directive, but we allow them as an // extension. while (Tmp.is(tok::comment)) // Skip comments in -C mode. LexUnexpandedToken(Tmp); - + if (Tmp.isNot(tok::eom)) { // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89, // because it is more trouble than it is worth to insert /**/ and check that @@ -148,12 +148,12 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false, FoundNonSkipPortion, FoundElse); - + if (CurPTHLexer) { PTHSkipExcludedConditionalBlock(); return; } - + // Enter raw mode to disable identifier lookup (and thus macro expansion), // disabling warnings, etc. CurPPLexer->LexingRawMode = true; @@ -163,7 +163,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, CurLexer->Lex(Tok); else CurPTHLexer->Lex(Tok); - + // If this is the end of the buffer, we have an error. if (Tok.is(tok::eof)) { // Emit errors for each unterminated conditional on the stack, including @@ -172,26 +172,26 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, Diag(CurPPLexer->ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); CurPPLexer->ConditionalStack.pop_back(); - } - + } + // Just return and let the caller lex after this #include. break; } - + // If this token is not a preprocessor directive, just skip it. if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) continue; - + // We just parsed a # character at the start of a line, so we're in // directive mode. Tell the lexer this so any newlines we see will be // converted into an EOM token (this terminates the macro). CurPPLexer->ParsingPreprocessorDirective = true; if (CurLexer) CurLexer->SetCommentRetentionState(false); - + // Read the next token, the directive flavor. LexUnexpandedToken(Tok); - + // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or // something bogus), skip it. if (Tok.isNot(tok::identifier)) { @@ -208,14 +208,14 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // other common directives. const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation()); char FirstChar = RawCharData[0]; - if (FirstChar >= 'a' && FirstChar <= 'z' && + if (FirstChar >= 'a' && FirstChar <= 'z' && FirstChar != 'i' && FirstChar != 'e') { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); continue; } - + // Get the identifier name without trigraphs or embedded newlines. Note // that we can't use Tok.getIdentifierInfo() because its lookup is disabled // when skipping. @@ -240,7 +240,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, Directive[IdLen] = 0; FirstChar = Directive[0]; } - + if (FirstChar == 'i' && Directive[1] == 'f') { if ((IdLen == 2) || // "if" (IdLen == 5 && !strcmp(Directive+2, "def")) || // "ifdef" @@ -260,7 +260,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, bool InCond = CurPPLexer->popConditionalLevel(CondInfo); InCond = InCond; // Silence warning in no-asserts mode. assert(!InCond && "Can't be skipping if not in a conditional!"); - + // If we popped the outermost skipping block, we're done skipping! if (!CondInfo.WasSkipping) break; @@ -270,13 +270,13 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // as a non-skipping conditional. DiscardUntilEndOfDirective(); // C99 6.10p4. PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); - + // If this is a #else with a #else before it, report the error. if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else); - + // Note that we've seen a #else in this conditional. CondInfo.FoundElse = true; - + // If the conditional is at the top level, and the #if block wasn't // entered, enter the #else block now. if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { @@ -301,10 +301,10 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro); CurPPLexer->LexingRawMode = true; } - + // If this is a #elif with a #else before it, report the error. if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else); - + // If this condition is true, enter it! if (ShouldEnter) { CondInfo.FoundNonSkip = true; @@ -312,7 +312,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, } } } - + CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); @@ -325,11 +325,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, } void Preprocessor::PTHSkipExcludedConditionalBlock() { - - while(1) { + + while (1) { assert(CurPTHLexer); assert(CurPTHLexer->LexingRawMode == false); - + // Skip to the next '#else', '#elif', or #endif. if (CurPTHLexer->SkipBlock()) { // We have reached an #endif. Both the '#' and 'endif' tokens @@ -340,12 +340,12 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() { assert(!InCond && "Can't be skipping if not in a conditional!"); break; } - + // We have reached a '#else' or '#elif'. Lex the next token to get // the directive flavor. Token Tok; LexUnexpandedToken(Tok); - + // We can actually look up the IdentifierInfo here since we aren't in // raw mode. tok::PPKeywordKind K = Tok.getIdentifierInfo()->getPPKeywordID(); @@ -357,32 +357,32 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() { PPConditionalInfo &CondInfo = CurPTHLexer->peekConditionalLevel(); // Note that we've seen a #else in this conditional. CondInfo.FoundElse = true; - + // If the #if block wasn't entered then enter the #else block now. if (!CondInfo.FoundNonSkip) { CondInfo.FoundNonSkip = true; - + // Scan until the eom token. CurPTHLexer->ParsingPreprocessorDirective = true; DiscardUntilEndOfDirective(); CurPTHLexer->ParsingPreprocessorDirective = false; - + break; } - + // Otherwise skip this block. continue; } - + assert(K == tok::pp_elif); PPConditionalInfo &CondInfo = CurPTHLexer->peekConditionalLevel(); // If this is a #elif with a #else before it, report the error. if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else); - + // If this is in a skipping block or if we're already handled this #if - // block, don't bother parsing the condition. We just skip this block. + // block, don't bother parsing the condition. We just skip this block. if (CondInfo.FoundNonSkip) continue; @@ -417,7 +417,7 @@ const FileEntry *Preprocessor::LookupFile(const char *FilenameStart, if (!FromDir) { FileID FID = getCurrentFileLexer()->getFileID(); CurFileEnt = SourceMgr.getFileEntryForID(FID); - + // If there is no file entry associated with this file, it must be the // predefines buffer. Any other file is not lexed with a normal lexer, so // it won't be scanned for preprocessor directives. If we have the @@ -429,14 +429,14 @@ const FileEntry *Preprocessor::LookupFile(const char *FilenameStart, CurFileEnt = SourceMgr.getFileEntryForID(FID); } } - + // Do a standard file entry lookup. CurDir = CurDirLookup; const FileEntry *FE = HeaderInfo.LookupFile(FilenameStart, FilenameEnd, isAngled, FromDir, CurDir, CurFileEnt); if (FE) return FE; - + // Otherwise, see if this is a subframework header. If so, this is relative // to one of the headers on the #include stack. Walk the list of the current // headers on the #include stack and pass them to HeaderInfo. @@ -446,18 +446,18 @@ const FileEntry *Preprocessor::LookupFile(const char *FilenameStart, CurFileEnt))) return FE; } - + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) { IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1]; if (IsFileLexer(ISEntry)) { - if ((CurFileEnt = + if ((CurFileEnt = SourceMgr.getFileEntryForID(ISEntry.ThePPLexer->getFileID()))) if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd, CurFileEnt))) return FE; } } - + // Otherwise, we really couldn't find the file. return 0; } @@ -468,31 +468,31 @@ const FileEntry *Preprocessor::LookupFile(const char *FilenameStart, //===----------------------------------------------------------------------===// /// HandleDirective - This callback is invoked when the lexer sees a # token -/// at the start of a line. This consumes the directive, modifies the +/// at the start of a line. This consumes the directive, modifies the /// lexer/preprocessor state, and advances the lexer(s) so that the next token /// read is the correct one. void Preprocessor::HandleDirective(Token &Result) { // FIXME: Traditional: # with whitespace before it not recognized by K&R? - + // We just parsed a # character at the start of a line, so we're in directive // mode. Tell the lexer this so any newlines we see will be converted into an // EOM token (which terminates the directive). CurPPLexer->ParsingPreprocessorDirective = true; - + ++NumDirectives; - + // We are about to read a token. For the multiple-include optimization FA to - // work, we have to remember if we had read any tokens *before* this + // work, we have to remember if we had read any tokens *before* this // pp-directive. bool ReadAnyTokensBeforeDirective = CurPPLexer->MIOpt.getHasReadAnyTokensVal(); - + // Save the '#' token in case we need to return it later. Token SavedHash = Result; - + // Read the next token, the directive flavor. This isn't expanded due to // C99 6.10.3p8. LexUnexpandedToken(Result); - + // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: // #define A(x) #x // A(abc @@ -501,7 +501,7 @@ void Preprocessor::HandleDirective(Token &Result) { // If so, the user is relying on non-portable behavior, emit a diagnostic. if (InMacroArgs) Diag(Result, diag::ext_embedded_directive); - + TryAgain: switch (Result.getKind()) { case tok::eom: @@ -518,7 +518,7 @@ TryAgain: default: IdentifierInfo *II = Result.getIdentifierInfo(); if (II == 0) break; // Not an identifier. - + // Ask what the preprocessor keyword ID is. switch (II->getPPKeywordID()) { default: break; @@ -535,13 +535,13 @@ TryAgain: return HandleElseDirective(Result); case tok::pp_endif: return HandleEndifDirective(Result); - + // C99 6.10.2 - Source File Inclusion. case tok::pp_include: return HandleIncludeDirective(Result); // Handle #include. case tok::pp___include_macros: return HandleIncludeMacrosDirective(Result); // Handle -imacros. - + // C99 6.10.3 - Macro Replacement. case tok::pp_define: return HandleDefineDirective(Result); @@ -551,21 +551,21 @@ TryAgain: // C99 6.10.4 - Line Control. case tok::pp_line: return HandleLineDirective(Result); - + // C99 6.10.5 - Error Directive. case tok::pp_error: return HandleUserDiagnosticDirective(Result, false); - + // C99 6.10.6 - Pragma Directive. case tok::pp_pragma: return HandlePragmaDirective(); - + // GNU Extensions. case tok::pp_import: return HandleImportDirective(Result); case tok::pp_include_next: return HandleIncludeNextDirective(Result); - + case tok::pp_warning: Diag(Result, diag::ext_pp_warning_directive); return HandleUserDiagnosticDirective(Result, true); @@ -582,15 +582,15 @@ TryAgain: } break; } - + // If this is a .S file, treat unknown # directives as non-preprocessor // directives. This is important because # may be a comment or introduce // various pseudo-ops. Just return the # token and push back the following // token to be lexed next time. if (getLangOptions().AsmPreprocessor) { - Token *Toks = new Token[2](); + Token *Toks = new Token[2]; // Return the # and the token after it. - Toks[0] = SavedHash; + Toks[0] = SavedHash; Toks[1] = Result; // Enter this token stream so that we re-lex the tokens. Make sure to // enable macro expansion, in case the token after the # is an identifier @@ -598,13 +598,13 @@ TryAgain: EnterTokenStream(Toks, 2, false, true); return; } - + // If we reached here, the preprocessing token is not valid! Diag(Result, diag::err_pp_invalid_directive); - + // Read the rest of the PP line. DiscardUntilEndOfDirective(); - + // Okay, we're done parsing the directive. } @@ -614,17 +614,17 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val, unsigned DiagID, Preprocessor &PP) { if (DigitTok.isNot(tok::numeric_constant)) { PP.Diag(DigitTok, DiagID); - + if (DigitTok.isNot(tok::eom)) PP.DiscardUntilEndOfDirective(); return true; } - + llvm::SmallString<64> IntegerBuffer; IntegerBuffer.resize(DigitTok.getLength()); const char *DigitTokBegin = &IntegerBuffer[0]; unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin); - + // Verify that we have a simple digit-sequence, and compute the value. This // is always a simple digit string computed in decimal, so we do this manually // here. @@ -636,7 +636,7 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val, PP.DiscardUntilEndOfDirective(); return true; } - + unsigned NextVal = Val*10+(DigitTokBegin[i]-'0'); if (NextVal < Val) { // overflow. PP.Diag(DigitTok, DiagID); @@ -645,21 +645,21 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val, } Val = NextVal; } - - // Reject 0, this is needed both by #line numbers and flags. + + // Reject 0, this is needed both by #line numbers and flags. if (Val == 0) { PP.Diag(DigitTok, DiagID); PP.DiscardUntilEndOfDirective(); return true; } - + if (DigitTokBegin[0] == '0') PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal); - + return false; } -/// HandleLineDirective - Handle #line directive: C99 6.10.4. The two +/// HandleLineDirective - Handle #line directive: C99 6.10.4. The two /// acceptable forms are: /// # line digit-sequence /// # line digit-sequence "s-char-sequence" @@ -679,14 +679,14 @@ void Preprocessor::HandleLineDirective(Token &Tok) { unsigned LineLimit = Features.C99 ? 2147483648U : 32768U; if (LineNo >= LineLimit) Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit; - + int FilenameID = -1; Token StrTok; Lex(StrTok); // If the StrTok is "eom", then it wasn't present. Otherwise, it must be a // string followed by eom. - if (StrTok.is(tok::eom)) + if (StrTok.is(tok::eom)) ; // ok else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_line_invalid_filename); @@ -704,14 +704,14 @@ void Preprocessor::HandleLineDirective(Token &Tok) { } FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString(), Literal.GetStringLength()); - + // Verify that there is nothing after the string, other than EOM. Because // of C99 6.10.4p5, macros that expand to empty tokens are ok. CheckEndOfDirective("line", true); } - + SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID); - + if (Callbacks) Callbacks->FileChanged(DigitTok.getLocation(), PPCallbacks::RenameFile, SrcMgr::C_User); @@ -731,21 +731,21 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, if (FlagVal == 1) { IsFileEntry = true; - + PP.Lex(FlagTok); if (FlagTok.is(tok::eom)) return false; if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) return true; } else if (FlagVal == 2) { IsFileExit = true; - + SourceManager &SM = PP.getSourceManager(); // If we are leaving the current presumed file, check to make sure the // presumed include stack isn't empty! FileID CurFileID = SM.getDecomposedInstantiationLoc(FlagTok.getLocation()).first; PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation()); - + // If there is no include loc (main file) or if the include loc is in a // different physical file, then we aren't in a "1" line marker flag region. SourceLocation IncLoc = PLoc.getIncludeLoc(); @@ -755,7 +755,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, PP.DiscardUntilEndOfDirective(); return true; } - + PP.Lex(FlagTok); if (FlagTok.is(tok::eom)) return false; if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) @@ -768,9 +768,9 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, PP.DiscardUntilEndOfDirective(); return true; } - + IsSystemHeader = true; - + PP.Lex(FlagTok); if (FlagTok.is(tok::eom)) return false; if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP)) @@ -782,9 +782,9 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, PP.DiscardUntilEndOfDirective(); return true; } - + IsExternCHeader = true; - + PP.Lex(FlagTok); if (FlagTok.is(tok::eom)) return false; @@ -798,7 +798,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, /// one of the following forms: /// /// # 42 -/// # 42 "file" ('1' | '2')? +/// # 42 "file" ('1' | '2')? /// # 42 "file" ('1' | '2')? '3' '4'? /// void Preprocessor::HandleDigitDirective(Token &DigitTok) { @@ -808,17 +808,17 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer, *this)) return; - + Token StrTok; Lex(StrTok); - + bool IsFileEntry = false, IsFileExit = false; bool IsSystemHeader = false, IsExternCHeader = false; int FilenameID = -1; // If the StrTok is "eom", then it wasn't present. Otherwise, it must be a // string followed by eom. - if (StrTok.is(tok::eom)) + if (StrTok.is(tok::eom)) ; // ok else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_linemarker_invalid_filename); @@ -835,18 +835,18 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { } FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString(), Literal.GetStringLength()); - + // If a filename was present, read any flags that are present. - if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, + if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, IsSystemHeader, IsExternCHeader, *this)) return; } - + // Create a line note with this information. SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, - IsFileEntry, IsFileExit, + IsFileEntry, IsFileExit, IsSystemHeader, IsExternCHeader); - + // If the preprocessor has callbacks installed, notify them of the #line // change. This is used so that the line marker comes out in -E mode for // example. @@ -861,7 +861,7 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { FileKind = SrcMgr::C_ExternCSystem; else if (IsSystemHeader) FileKind = SrcMgr::C_System; - + Callbacks->FileChanged(DigitTok.getLocation(), Reason, FileKind); } } @@ -869,7 +869,7 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { /// HandleUserDiagnosticDirective - Handle a #warning or #error directive. /// -void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, +void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, bool isWarning) { // PTH doesn't emit #warning or #error directives. if (CurPTHLexer) @@ -892,11 +892,11 @@ void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { // Yes, this directive is an extension. Diag(Tok, diag::ext_pp_ident_directive); - + // Read the string argument. Token StrTok; Lex(StrTok); - + // If the token kind isn't a string, it's a malformed directive. if (StrTok.isNot(tok::string_literal) && StrTok.isNot(tok::wide_string_literal)) { @@ -905,7 +905,7 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { DiscardUntilEndOfDirective(); return; } - + // Verify that there is nothing after the string, other than EOM. CheckEndOfDirective("ident"); @@ -928,7 +928,7 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, const char *&BufEnd) { // Get the text form of the filename. assert(BufStart != BufEnd && "Can't have tokens with empty spellings!"); - + // Make sure the filename is <x> or "x". bool isAngled; if (BufStart[0] == '<') { @@ -950,14 +950,14 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, BufStart = 0; return true; } - + // Diagnose #include "" as invalid. if (BufEnd-BufStart <= 2) { Diag(Loc, diag::err_pp_empty_filename); BufStart = 0; return ""; } - + // Skip the brackets. ++BufStart; --BufEnd; @@ -977,33 +977,33 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, static bool ConcatenateIncludeName(llvm::SmallVector<char, 128> &FilenameBuffer, Preprocessor &PP) { Token CurTok; - + PP.Lex(CurTok); while (CurTok.isNot(tok::eom)) { // Append the spelling of this token to the buffer. If there was a space // before it, add it now. if (CurTok.hasLeadingSpace()) FilenameBuffer.push_back(' '); - + // Get the spelling of the token, directly into FilenameBuffer if possible. unsigned PreAppendSize = FilenameBuffer.size(); FilenameBuffer.resize(PreAppendSize+CurTok.getLength()); - + const char *BufPtr = &FilenameBuffer[PreAppendSize]; unsigned ActualLen = PP.getSpelling(CurTok, BufPtr); - + // If the token was spelled somewhere else, copy it into FilenameBuffer. if (BufPtr != &FilenameBuffer[PreAppendSize]) memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); - + // Resize FilenameBuffer to the correct size. if (CurTok.getLength() != ActualLen) FilenameBuffer.resize(PreAppendSize+ActualLen); - + // If we found the '>' marker, return success. if (CurTok.is(tok::greater)) return false; - + PP.Lex(CurTok); } @@ -1017,14 +1017,14 @@ static bool ConcatenateIncludeName(llvm::SmallVector<char, 128> &FilenameBuffer, /// file to be included from the lexer, then include it! This is a common /// routine with functionality shared between #include, #include_next and /// #import. LookupFrom is set when this is a #include_next directive, it -/// specifies the file to start searching from. +/// specifies the file to start searching from. void Preprocessor::HandleIncludeDirective(Token &IncludeTok, const DirectoryLookup *LookupFrom, bool isImport) { Token FilenameTok; CurPPLexer->LexIncludeFilename(FilenameTok); - + // Reserve a buffer to get the spelling. llvm::SmallVector<char, 128> FilenameBuffer; const char *FilenameStart, *FilenameEnd; @@ -1033,7 +1033,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok, case tok::eom: // If the token kind is EOM, the error has already been diagnosed. return; - + case tok::angle_string_literal: case tok::string_literal: { FilenameBuffer.resize(FilenameTok.getLength()); @@ -1042,7 +1042,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok, FilenameEnd = FilenameStart+Len; break; } - + case tok::less: // This could be a <foo/bar.h> file coming from a macro expansion. In this // case, glue the tokens together into FilenameBuffer and interpret those. @@ -1057,7 +1057,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok, DiscardUntilEndOfDirective(); return; } - + bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(), FilenameStart, FilenameEnd); // If GetIncludeFilenameSpelling set the start ptr to null, there was an @@ -1066,7 +1066,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok, DiscardUntilEndOfDirective(); return; } - + // Verify that there is nothing after the filename, other than EOM. Note that // we allow macros that expand to nothing after the filename, because this // falls into the category of "#include pp-tokens new-line" specified in @@ -1078,7 +1078,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok, Diag(FilenameTok, diag::err_pp_include_too_deep); return; } - + // Search include directories. const DirectoryLookup *CurDir; const FileEntry *File = LookupFile(FilenameStart, FilenameEnd, @@ -1088,19 +1088,19 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok, << std::string(FilenameStart, FilenameEnd); return; } - + // Ask HeaderInfo if we should enter this #include file. If not, #including // this file will have no effect. if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport)) return; - + // The #included file will be considered to be a system header if either it is // in a system include directory, or if the #includer is a system include // header. - SrcMgr::CharacteristicKind FileCharacter = + SrcMgr::CharacteristicKind FileCharacter = std::max(HeaderInfo.getFileDirFlavor(File), SourceMgr.getFileCharacteristic(FilenameTok.getLocation())); - + // Look up the file, create a File ID for it. FileID FID = SourceMgr.createFileID(File, FilenameTok.getLocation(), FileCharacter); @@ -1118,7 +1118,7 @@ void Preprocessor::HandleIncludeDirective(Token &IncludeTok, /// void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) { Diag(IncludeNextTok, diag::ext_pp_include_next_directive); - + // #include_next is like #include, except that we start searching after // the current found directory. If we can't do this, issue a // diagnostic. @@ -1132,7 +1132,7 @@ void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) { // Start looking up in the next directory. ++Lookup; } - + return HandleIncludeDirective(IncludeNextTok, Lookup); } @@ -1141,7 +1141,7 @@ void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) { void Preprocessor::HandleImportDirective(Token &ImportTok) { if (!Features.ObjC1) // #import is standard for ObjC. Diag(ImportTok, diag::ext_pp_import_directive); - + return HandleIncludeDirective(ImportTok, 0, true); } @@ -1159,11 +1159,11 @@ void Preprocessor::HandleIncludeMacrosDirective(Token &IncludeMacrosTok) { DiscardUntilEndOfDirective(); return; } - + // Treat this as a normal #include for checking purposes. If this is // successful, it will push a new lexer onto the include stack. HandleIncludeDirective(IncludeMacrosTok, 0, false); - + Token TmpTok; do { Lex(TmpTok); @@ -1181,7 +1181,7 @@ void Preprocessor::HandleIncludeMacrosDirective(Token &IncludeMacrosTok) { /// parsing the arg list. bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { llvm::SmallVector<IdentifierInfo*, 32> Arguments; - + Token Tok; while (1) { LexUnexpandedToken(Tok); @@ -1223,18 +1223,18 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { // If this is already used as an argument, it is used multiple times (e.g. // #define X(A,A. - if (std::find(Arguments.begin(), Arguments.end(), II) != + if (std::find(Arguments.begin(), Arguments.end(), II) != Arguments.end()) { // C99 6.10.3p6 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II; return true; } - + // Add the argument to the macro info. Arguments.push_back(II); - + // Lex the token after the identifier. LexUnexpandedToken(Tok); - + switch (Tok.getKind()) { default: // #define X(A B Diag(Tok, diag::err_pp_expected_comma_in_arg_list); @@ -1247,14 +1247,14 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { case tok::ellipsis: // #define X(A... -> GCC extension // Diagnose extension. Diag(Tok, diag::ext_named_variadic_macro); - + // Lex the token after the identifier. LexUnexpandedToken(Tok); if (Tok.isNot(tok::r_paren)) { Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); return true; } - + MI->setIsGNUVarargs(); MI->setArgumentList(&Arguments[0], Arguments.size(), BP); return false; @@ -1270,7 +1270,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { Token MacroNameTok; ReadMacroName(MacroNameTok, 1); - + // Error reading macro name? If so, diagnostic already issued. if (MacroNameTok.is(tok::eom)) return; @@ -1280,13 +1280,13 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // If we are supposed to keep comments in #defines, reenable comment saving // mode. if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments); - + // Create the new macro. MacroInfo *MI = AllocateMacroInfo(MacroNameTok.getLocation()); - + Token Tok; LexUnexpandedToken(Tok); - + // If this is a function-like macro definition, parse the argument list, // marking each of the identifiers as being used as macro arguments. Also, // check other constraints on the first token of the macro body. @@ -1310,13 +1310,13 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // If this is a definition of a variadic C99 function-like macro, not using // the GNU named varargs extension, enabled __VA_ARGS__. - + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. // This gets unpoisoned where it is allowed. assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!"); if (MI->isC99Varargs()) Ident__VA_ARGS__->setIsPoisoned(false); - + // Read the first token after the arg list for down below. LexUnexpandedToken(Tok); } else if (Features.C99) { @@ -1357,7 +1357,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { // Get the next token of the macro. LexUnexpandedToken(Tok); } - + } else { // Otherwise, read the body of a function-like macro. While we are at it, // check C99 6.10.3.2p1: ensure that # operators are followed by macro @@ -1367,15 +1367,15 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { if (Tok.isNot(tok::hash)) { MI->AddTokenToBody(Tok); - + // Get the next token of the macro. LexUnexpandedToken(Tok); continue; } - + // Get the next token of the macro. LexUnexpandedToken(Tok); - + // Check for a valid macro arg identifier. if (Tok.getIdentifierInfo() == 0 || MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) { @@ -1389,24 +1389,24 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { } else { Diag(Tok, diag::err_pp_stringize_not_parameter); ReleaseMacroInfo(MI); - + // Disable __VA_ARGS__ again. Ident__VA_ARGS__->setIsPoisoned(true); return; } } - + // Things look ok, add the '#' and param name tokens to the macro. MI->AddTokenToBody(LastTok); MI->AddTokenToBody(Tok); LastTok = Tok; - + // Get the next token of the macro. LexUnexpandedToken(Tok); } } - - + + // Disable __VA_ARGS__ again. Ident__VA_ARGS__->setIsPoisoned(true); @@ -1425,14 +1425,14 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { return; } } - + // If this is the primary source file, remember that this macro hasn't been // used yet. if (isInPrimaryFile()) MI->setIsUsed(false); MI->setDefinitionEndLoc(LastTok.getLocation()); - + // Finally, if this identifier already had a macro defined for it, verify that // the macro bodies are identical and free the old definition. if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) { @@ -1452,12 +1452,12 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition); } } - + ReleaseMacroInfo(OtherMI); } - + setMacroInfo(MacroNameTok.getIdentifierInfo(), MI); - + // If the callbacks want to know, tell them about the macro definition. if (Callbacks) Callbacks->MacroDefined(MacroNameTok.getIdentifierInfo(), MI); @@ -1470,17 +1470,17 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) { Token MacroNameTok; ReadMacroName(MacroNameTok, 2); - + // Error reading macro name? If so, diagnostic already issued. if (MacroNameTok.is(tok::eom)) return; - + // Check to see if this is the last token on the #undef line. CheckEndOfDirective("undef"); - + // Okay, we finally have a valid identifier to undef. MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo()); - + // If the macro is not defined, this is a noop undef, just return. if (MI == 0) return; @@ -1513,7 +1513,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, Token MacroNameTok; ReadMacroName(MacroNameTok); - + // Error reading macro name? If so, diagnostic already issued. if (MacroNameTok.is(tok::eom)) { // Skip code until we get to #endif. This helps with recovery by not @@ -1522,7 +1522,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, /*Foundnonskip*/false, /*FoundElse*/false); return; } - + // Check to see if this is the last token on the #if[n]def line. CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef"); @@ -1541,7 +1541,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, // If there is a macro, process it. if (MI) // Mark it used. MI->setIsUsed(true); - + // Should we include the stuff contained by this directive? if (!MI == isIfndef) { // Yes, remember that we are inside a conditional, then lex the next token. @@ -1550,7 +1550,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, } else { // No, skip the contents of this block and return the first token after it. SkipExcludedConditionalBlock(DirectiveTok.getLocation(), - /*Foundnonskip*/false, + /*Foundnonskip*/false, /*FoundElse*/false); } } @@ -1560,11 +1560,11 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, void Preprocessor::HandleIfDirective(Token &IfToken, bool ReadAnyTokensBeforeDirective) { ++NumIf; - + // Parse and evaluation the conditional expression. IdentifierInfo *IfNDefMacro = 0; bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro); - + // If this condition is equivalent to #ifndef X, and if this is the first // directive seen, handle it for the multiple-include optimization. @@ -1582,7 +1582,7 @@ void Preprocessor::HandleIfDirective(Token &IfToken, /*foundnonskip*/true, /*foundelse*/false); } else { // No, skip the contents of this block and return the first token after it. - SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false, + SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false, /*FoundElse*/false); } } @@ -1591,21 +1591,21 @@ void Preprocessor::HandleIfDirective(Token &IfToken, /// void Preprocessor::HandleEndifDirective(Token &EndifToken) { ++NumEndif; - + // Check that this is the whole directive. CheckEndOfDirective("endif"); - + PPConditionalInfo CondInfo; if (CurPPLexer->popConditionalLevel(CondInfo)) { // No conditionals on the stack: this is an #endif without an #if. Diag(EndifToken, diag::err_pp_endif_without_if); return; } - + // If this the end of a top-level #endif, inform MIOpt. if (CurPPLexer->getConditionalStackDepth() == 0) CurPPLexer->MIOpt.ExitTopLevelConditional(); - + assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode && "This code should only be reachable in the non-skipping case!"); } @@ -1613,23 +1613,23 @@ void Preprocessor::HandleEndifDirective(Token &EndifToken) { void Preprocessor::HandleElseDirective(Token &Result) { ++NumElse; - + // #else directive in a non-skipping conditional... start skipping. CheckEndOfDirective("else"); - + PPConditionalInfo CI; if (CurPPLexer->popConditionalLevel(CI)) { Diag(Result, diag::pp_err_else_without_if); return; } - + // If this is a top-level #else, inform the MIOpt. if (CurPPLexer->getConditionalStackDepth() == 0) CurPPLexer->MIOpt.EnterTopLevelConditional(); // If this is a #else with a #else before it, report the error. if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else); - + // Finally, skip the rest of the contents of this block and return the first // token after it. return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, @@ -1638,7 +1638,7 @@ void Preprocessor::HandleElseDirective(Token &Result) { void Preprocessor::HandleElifDirective(Token &ElifToken) { ++NumElse; - + // #elif directive in a non-skipping conditional... start skipping. // We don't care what the condition is, because we will always skip it (since // the block immediately before it was included). @@ -1649,11 +1649,11 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) { Diag(ElifToken, diag::pp_err_elif_without_if); return; } - + // If this is a top-level #elif, inform the MIOpt. if (CurPPLexer->getConditionalStackDepth() == 0) CurPPLexer->MIOpt.EnterTopLevelConditional(); - + // If this is a #elif with a #else before it, report the error. if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else); diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp index c98acc4deb351..908385c5d3921 100644 --- a/lib/Lex/PPExpressions.cpp +++ b/lib/Lex/PPExpressions.cpp @@ -30,18 +30,18 @@ class PPValue { SourceRange Range; public: llvm::APSInt Val; - + // Default ctor - Construct an 'invalid' PPValue. PPValue(unsigned BitWidth) : Val(BitWidth) {} - + unsigned getBitWidth() const { return Val.getBitWidth(); } bool isUnsigned() const { return Val.isUnsigned(); } - + const SourceRange &getRange() const { return Range; } - + void setRange(SourceLocation L) { Range.setBegin(L); Range.setEnd(L); } void setRange(SourceLocation B, SourceLocation E) { - Range.setBegin(B); Range.setEnd(E); + Range.setBegin(B); Range.setEnd(E); } void setBegin(SourceLocation L) { Range.setBegin(L); } void setEnd(SourceLocation L) { Range.setEnd(L); } @@ -82,7 +82,7 @@ struct DefinedTracker { static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, bool ValueLive, Preprocessor &PP) { DT.State = DefinedTracker::Unknown; - + // If this token's spelling is a pp-identifier, check to see if it is // 'defined' or if it is a macro. Note that we check here because many // keywords are pp-identifiers, so we can't check the kind. @@ -113,13 +113,13 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, LParenLoc = PeekTok.getLocation(); PP.LexUnexpandedToken(PeekTok); } - + // If we don't have a pp-identifier now, this is an error. if ((II = PeekTok.getIdentifierInfo()) == 0) { PP.Diag(PeekTok, diag::err_pp_defined_requires_identifier); return true; } - + // Otherwise, we got an identifier, is it defined to something? Result.Val = II->hasMacroDefinition(); Result.Val.setIsUnsigned(false); // Result is signed intmax_t. @@ -145,13 +145,13 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Result.setEnd(PeekTok.getLocation()); PP.LexNonComment(PeekTok); } - + // Success, remember that we saw defined(X). DT.State = DefinedTracker::DefinedMacro; DT.TheMacro = II; return false; } - + switch (PeekTok.getKind()) { default: // Non-value token. PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr); @@ -166,11 +166,11 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, IntegerBuffer.resize(PeekTok.getLength()); const char *ThisTokBegin = &IntegerBuffer[0]; unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin); - NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, PeekTok.getLocation(), PP); if (Literal.hadError) return true; // a diagnostic was already reported. - + if (Literal.isFloatingLiteral() || Literal.isImaginary) { PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal); return true; @@ -191,7 +191,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, // Set the signedness of the result to match whether there was a U suffix // or not. Result.Val.setIsUnsigned(Literal.isUnsigned); - + // Detect overflow based on whether the value is signed. If signed // and if the value is too large, emit a warning "integer constant is so // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t @@ -203,7 +203,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Result.Val.setIsUnsigned(true); } } - + // Consume the token. Result.setRange(PeekTok.getLocation()); PP.LexNonComment(PeekTok); @@ -214,7 +214,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, CharBuffer.resize(PeekTok.getLength()); const char *ThisTokBegin = &CharBuffer[0]; unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin); - CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, PeekTok.getLocation(), PP); if (Literal.hadError()) return true; // A diagnostic was already emitted. @@ -224,8 +224,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, unsigned NumBits; if (Literal.isMultiChar()) NumBits = TI.getIntWidth(); + else if (Literal.isWide()) + NumBits = TI.getWCharWidth(); else - NumBits = TI.getCharWidth(Literal.isWide()); + NumBits = TI.getCharWidth(); // Set the width. llvm::APSInt Val(NumBits); @@ -233,7 +235,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Val = Literal.getValue(); // Set the signedness. Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned); - + if (Result.Val.getBitWidth() > Val.getBitWidth()) { Result.Val = Val.extend(Result.Val.getBitWidth()); } else { @@ -262,7 +264,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, // Otherwise, we have something like (x+y), and we consumed '(x'. if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP)) return true; - + if (PeekTok.isNot(tok::r_paren)) { PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_rparen) << Result.getRange(); @@ -288,21 +290,21 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.LexNonComment(PeekTok); if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; Result.setBegin(Loc); - + // C99 6.5.3.3p3: The sign of the result matches the sign of the operand. Result.Val = -Result.Val; - + // -MININT is the only thing that overflows. Unsigned never overflows. bool Overflow = !Result.isUnsigned() && Result.Val.isMinSignedValue(); - + // If this operator is live and overflowed, report the issue. if (Overflow && ValueLive) PP.Diag(Loc, diag::warn_pp_expr_overflow) << Result.getRange(); - + DT.State = DefinedTracker::Unknown; return false; } - + case tok::tilde: { SourceLocation Start = PeekTok.getLocation(); PP.LexNonComment(PeekTok); @@ -314,7 +316,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, DT.State = DefinedTracker::Unknown; return false; } - + case tok::exclaim: { SourceLocation Start = PeekTok.getLocation(); PP.LexNonComment(PeekTok); @@ -323,14 +325,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Result.Val = !Result.Val; // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed. Result.Val.setIsUnsigned(false); - + if (DT.State == DefinedTracker::DefinedMacro) DT.State = DefinedTracker::NotDefinedMacro; else if (DT.State == DefinedTracker::NotDefinedMacro) DT.State = DefinedTracker::DefinedMacro; return false; } - + // FIXME: Handle #assert } } @@ -388,17 +390,17 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, << LHS.getRange(); return true; } - + while (1) { // If this token has a lower precedence than we are allowed to parse, return // it so that higher levels of the recursion can parse it. if (PeekPrec < MinPrec) return false; - + tok::TokenKind Operator = PeekTok.getKind(); - + // If this is a short-circuiting operator, see if the RHS of the operator is - // dead. Note that this cannot just clobber ValueLive. Consider + // dead. Note that this cannot just clobber ValueLive. Consider // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)". In // this example, the RHS of the && being dead does not make the rest of the // expr dead. @@ -432,7 +434,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, << RHS.getRange(); return true; } - + // Decide whether to include the next binop in this subexpression. For // example, when parsing x+y*z and looking at '*', we want to recursively // handle y*z as a single subexpression. We do this because the precedence @@ -449,16 +451,16 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, RHSPrec = getPrecedence(tok::comma); else // All others should munch while higher precedence. RHSPrec = ThisPrec+1; - + if (PeekPrec >= RHSPrec) { if (EvaluateDirectiveSubExpr(RHS, RHSPrec, PeekTok, RHSIsLive, PP)) return true; PeekPrec = getPrecedence(PeekTok.getKind()); } assert(PeekPrec <= ThisPrec && "Recursion didn't work!"); - + // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if - // either operand is unsigned. + // either operand is unsigned. llvm::APSInt Res(LHS.getBitWidth()); switch (Operator) { case tok::question: // No UAC for x and y in "x ? y : z". @@ -487,7 +489,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, LHS.Val.setIsUnsigned(Res.isUnsigned()); RHS.Val.setIsUnsigned(Res.isUnsigned()); } - + // FIXME: All of these should detect and report overflow?? bool Overflow = false; switch (Operator) { @@ -512,7 +514,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, return true; } break; - + case tok::star: Res = LHS.Val * RHS.Val; if (Res.isSigned() && LHS.Val != 0 && RHS.Val != 0) @@ -529,7 +531,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, Overflow = ShAmt >= LHS.Val.countLeadingZeros(); else Overflow = ShAmt >= LHS.Val.countLeadingOnes(); - + Res = LHS.Val << ShAmt; break; } @@ -605,7 +607,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, PP.Diag(OpLoc, diag::ext_pp_comma_expr) << LHS.getRange() << RHS.getRange(); Res = RHS.Val; // LHS = LHS,RHS -> RHS. - break; + break; case tok::question: { // Parse the : part of the expression. if (PeekTok.isNot(tok::colon)) { @@ -629,7 +631,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec, PeekTok, AfterColonLive, PP)) return true; - + // Now that we have the condition, the LHS and the RHS of the :, evaluate. Res = LHS.Val != 0 ? RHS.Val : AfterColonVal.Val; RHS.setEnd(AfterColonVal.getRange().getEnd()); @@ -637,7 +639,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if // either operand is unsigned. Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned()); - + // Figure out the precedence of the token after the : part. PeekPrec = getPrecedence(PeekTok.getKind()); break; @@ -653,12 +655,12 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, if (Overflow && ValueLive) PP.Diag(OpLoc, diag::warn_pp_expr_overflow) << LHS.getRange() << RHS.getRange(); - + // Put the result back into 'LHS' for our next iteration. LHS.Val = Res; LHS.setEnd(RHS.getRange().getEnd()); } - + return false; } @@ -670,10 +672,10 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Peek ahead one token. Token Tok; Lex(Tok); - + // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. unsigned BitWidth = getTargetInfo().getIntMaxTWidth(); - + PPValue ResVal(BitWidth); DefinedTracker DT; if (EvaluateValue(ResVal, Tok, DT, true, *this)) { @@ -682,7 +684,7 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { DiscardUntilEndOfDirective(); return false; } - + // If we are at the end of the expression after just parsing a value, there // must be no (unparenthesized) binary operators involved, so we can exit // directly. @@ -691,10 +693,10 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // macro in IfNDefMacro. if (DT.State == DefinedTracker::NotDefinedMacro) IfNDefMacro = DT.TheMacro; - + return ResVal.Val != 0; } - + // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the // operator and the stuff after it. if (EvaluateDirectiveSubExpr(ResVal, getPrecedence(tok::question), @@ -704,14 +706,14 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { DiscardUntilEndOfDirective(); return false; } - + // If we aren't at the tok::eom token, something bad happened, like an extra // ')' token. if (Tok.isNot(tok::eom)) { Diag(Tok, diag::err_pp_expected_eol); DiscardUntilEndOfDirective(); } - + return ResVal.Val != 0; } diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index 2a05ba336fcac..41ed991436b9e 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -31,7 +31,7 @@ PPCallbacks::~PPCallbacks() {} bool Preprocessor::isInPrimaryFile() const { if (IsFileLexer()) return IncludeMacroStack.empty(); - + // If there are any stacked lexers, we're in a #include. assert(IsFileLexer(IncludeMacroStack[0]) && "Top level include stack isn't our primary lexer?"); @@ -47,7 +47,7 @@ bool Preprocessor::isInPrimaryFile() const { PreprocessorLexer *Preprocessor::getCurrentFileLexer() const { if (IsFileLexer()) return CurPPLexer; - + // Look for a stacked lexer. for (unsigned i = IncludeMacroStack.size(); i != 0; --i) { const IncludeStackInfo& ISI = IncludeMacroStack[i-1]; @@ -68,7 +68,7 @@ PreprocessorLexer *Preprocessor::getCurrentFileLexer() const { void Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir) { assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!"); ++NumEnteredSourceFiles; - + if (MaxIncludeStackDepth < IncludeMacroStack.size()) MaxIncludeStackDepth = IncludeMacroStack.size(); @@ -77,13 +77,13 @@ void Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir) { return EnterSourceFileWithPTH(PL, CurDir); } EnterSourceFileWithLexer(new Lexer(FID, *this), CurDir); -} +} /// EnterSourceFileWithLexer - Add a source file to the top of the include stack /// and start lexing tokens from it instead of the current buffer. -void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, +void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *CurDir) { - + // Add the current lexer to the include stack. if (CurPPLexer || CurTokenLexer) PushIncludeMacroStack(); @@ -91,12 +91,12 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurLexer.reset(TheLexer); CurPPLexer = TheLexer; CurDirLookup = CurDir; - + // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { SrcMgr::CharacteristicKind FileType = SourceMgr.getFileCharacteristic(CurLexer->getFileLoc()); - + Callbacks->FileChanged(CurLexer->getFileLoc(), PPCallbacks::EnterFile, FileType); } @@ -104,9 +104,9 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, /// EnterSourceFileWithPTH - Add a source file to the top of the include stack /// and start getting tokens from it using the PTH cache. -void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL, +void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *CurDir) { - + if (CurPPLexer || CurTokenLexer) PushIncludeMacroStack(); @@ -130,7 +130,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, MacroArgs *Args) { PushIncludeMacroStack(); CurDirLookup = 0; - + if (NumCachedTokenLexers == 0) { CurTokenLexer.reset(new TokenLexer(Tok, ILEnd, Args, *this)); } else { @@ -174,18 +174,18 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { assert(!CurTokenLexer && "Ending a file when currently in a macro!"); - + // See if this file had a controlling macro. if (CurPPLexer) { // Not ending a macro, ignore it. - if (const IdentifierInfo *ControllingMacro = + if (const IdentifierInfo *ControllingMacro = CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) { // Okay, this has a controlling macro, remember in HeaderFileInfo. - if (const FileEntry *FE = + if (const FileEntry *FE = SourceMgr.getFileEntryForID(CurPPLexer->getFileID())) HeaderInfo.SetFileControllingMacro(FE, ControllingMacro); } } - + // If this is a #include'd file, pop it off the include stack and continue // lexing the #includer file. if (!IncludeMacroStack.empty()) { @@ -197,7 +197,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { SrcMgr::CharacteristicKind FileType = SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation()); Callbacks->FileChanged(CurPPLexer->getSourceLocation(), - PPCallbacks::ExitFile, FileType); + PPCallbacks::ExitFile, FileType); } // Client should lex another token. @@ -210,21 +210,21 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { // actually typed, which is goodness. if (CurLexer) { const char *EndPos = CurLexer->BufferEnd; - if (EndPos != CurLexer->BufferStart && + if (EndPos != CurLexer->BufferStart && (EndPos[-1] == '\n' || EndPos[-1] == '\r')) { --EndPos; - + // Handle \n\r and \r\n: - if (EndPos != CurLexer->BufferStart && + if (EndPos != CurLexer->BufferStart && (EndPos[-1] == '\n' || EndPos[-1] == '\r') && EndPos[-1] != EndPos[0]) --EndPos; } - + Result.startToken(); CurLexer->BufferPtr = EndPos; CurLexer->FormTokenWithChars(Result, EndPos, tok::eof); - + // We're done with the #included file. CurLexer.reset(); } else { @@ -232,12 +232,12 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { CurPTHLexer->getEOF(Result); CurPTHLexer.reset(); } - + CurPPLexer = 0; // This is the end of the top-level file. If the diag::pp_macro_not_used // diagnostic is enabled, look for macros that have not been used. - if (getDiagnostics().getDiagnosticLevel(diag::pp_macro_not_used) != + if (getDiagnostics().getDiagnosticLevel(diag::pp_macro_not_used) != Diagnostic::Ignored) { for (macro_iterator I = macro_begin(), E = macro_end(); I != E; ++I) if (!I->second->isUsed()) @@ -267,15 +267,15 @@ bool Preprocessor::HandleEndOfTokenLexer(Token &Result) { /// state of the top-of-stack lexer is unknown. void Preprocessor::RemoveTopOfLexerStack() { assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load"); - + if (CurTokenLexer) { // Delete or cache the now-dead macro expander. if (NumCachedTokenLexers == TokenLexerCacheSize) CurTokenLexer.reset(); else TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer.take(); - } - + } + PopIncludeMacroStack(); } @@ -285,7 +285,7 @@ void Preprocessor::RemoveTopOfLexerStack() { void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) { assert(CurTokenLexer && !CurPPLexer && "Pasted comment can only be formed from macro"); - + // We handle this by scanning for the closest real lexer, switching it to // raw mode and preprocessor mode. This will cause it to return \n as an // explicit EOM token. @@ -294,7 +294,7 @@ void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) { for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) { IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1); if (ISI.ThePPLexer == 0) continue; // Scan for a real lexer. - + // Once we find a real lexer, mark it as raw mode (disabling macro // expansions) and preprocessor mode (return EOM). We know that the lexer // was *not* in raw mode before, because the macro that the comment came @@ -307,12 +307,12 @@ void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) { FoundLexer->ParsingPreprocessorDirective = true; break; } - + // Okay, we either found and switched over the lexer, or we didn't find a // lexer. In either case, finish off the macro the comment came from, getting // the next token. if (!HandleEndOfTokenLexer(Tok)) Lex(Tok); - + // Discarding comments as long as we don't have EOF or EOM. This 'comments // out' the rest of the line, including any tokens that came from other macros // that were active, as in: @@ -321,22 +321,22 @@ void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) { // which should lex to 'a' only: 'b' and 'c' should be removed. while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof)) Lex(Tok); - + // If we got an eom token, then we successfully found the end of the line. if (Tok.is(tok::eom)) { assert(FoundLexer && "Can't get end of line without an active lexer"); // Restore the lexer back to normal mode instead of raw mode. FoundLexer->LexingRawMode = false; - + // If the lexer was already in preprocessor mode, just return the EOM token // to finish the preprocessor line. if (LexerWasInPPMode) return; - + // Otherwise, switch out of PP mode and return the next lexed token. FoundLexer->ParsingPreprocessorDirective = false; return Lex(Tok); } - + // If we got an EOF token, then we reached the end of the token stream but // didn't find an explicit \n. This can only happen if there was no lexer // active (an active lexer would return EOM at EOF if there was no \n in diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index 286705181cced..7ddf215020d01 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -39,7 +39,7 @@ void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) { static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){ // Get the identifier. IdentifierInfo *Id = PP.getIdentifierInfo(Name); - + // Mark it as being a macro that is builtin. MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation()); MI->setIsBuiltinMacro(); @@ -57,12 +57,12 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__"); Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__"); Ident_Pragma = RegisterBuiltinMacro(*this, "_Pragma"); - + // GCC Extensions. Ident__BASE_FILE__ = RegisterBuiltinMacro(*this, "__BASE_FILE__"); Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro(*this, "__INCLUDE_LEVEL__"); Ident__TIMESTAMP__ = RegisterBuiltinMacro(*this, "__TIMESTAMP__"); - + // Clang Extensions. Ident__has_feature = RegisterBuiltinMacro(*this, "__has_feature"); Ident__has_builtin = RegisterBuiltinMacro(*this, "__has_builtin"); @@ -77,14 +77,14 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, // If the token isn't an identifier, it's always literally expanded. if (II == 0) return true; - + // If the identifier is a macro, and if that macro is enabled, it may be // expanded so it's not a trivial expansion. if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() && // Fast expanding "#define X X" is ok, because X would be disabled. II != MacroIdent) return false; - + // If this is an object-like macro invocation, it is safe to trivially expand // it. if (MI->isObjectLike()) return true; @@ -95,7 +95,7 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, I != E; ++I) if (*I == II) return false; // Identifier is a macro argument. - + return true; } @@ -112,7 +112,7 @@ bool Preprocessor::isNextPPTokenLParen() { Val = CurPTHLexer->isNextPPTokenLParen(); else Val = CurTokenLexer->isNextTokenLParen(); - + if (Val == 2) { // We have run off the end. If it's a source file we don't // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the @@ -127,10 +127,10 @@ bool Preprocessor::isNextPPTokenLParen() { Val = Entry.ThePTHLexer->isNextPPTokenLParen(); else Val = Entry.TheTokenLexer->isNextTokenLParen(); - + if (Val != 2) break; - + // Ran off the end of a source file? if (Entry.ThePPLexer) return false; @@ -145,72 +145,72 @@ bool Preprocessor::isNextPPTokenLParen() { /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be /// expanded as a macro, handle it and return the next token as 'Identifier'. -bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, +bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, MacroInfo *MI) { if (Callbacks) Callbacks->MacroExpands(Identifier, MI); - + // If this is a macro exapnsion in the "#if !defined(x)" line for the file, // then the macro could expand to different things in other contexts, we need // to disable the optimization in this case. if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro(); - + // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. if (MI->isBuiltinMacro()) { ExpandBuiltinMacro(Identifier); return false; } - + /// Args - If this is a function-like macro expansion, this contains, /// for each macro argument, the list of tokens that were provided to the /// invocation. MacroArgs *Args = 0; - + // Remember where the end of the instantiation occurred. For an object-like // macro, this is the identifier. For a function-like macro, this is the ')'. SourceLocation InstantiationEnd = Identifier.getLocation(); - + // If this is a function-like macro, read the arguments. if (MI->isFunctionLike()) { // C99 6.10.3p10: If the preprocessing token immediately after the the macro // name isn't a '(', this macro should not be expanded. if (!isNextPPTokenLParen()) return true; - + // Remember that we are now parsing the arguments to a macro invocation. // Preprocessor directives used inside macro arguments are not portable, and // this enables the warning. InMacroArgs = true; Args = ReadFunctionLikeMacroArgs(Identifier, MI, InstantiationEnd); - + // Finished parsing args. InMacroArgs = false; - + // If there was an error parsing the arguments, bail out. if (Args == 0) return false; - + ++NumFnMacroExpanded; } else { ++NumMacroExpanded; } - + // Notice that this macro has been used. MI->setIsUsed(true); - + // If we started lexing a macro, enter the macro expansion body. - + // If this macro expands to no tokens, don't bother to push it onto the // expansion stack, only to take it right back off. if (MI->getNumTokens() == 0) { // No need for arg info. if (Args) Args->destroy(); - + // Ignore this macro use, just return the next token in the current // buffer. bool HadLeadingSpace = Identifier.hasLeadingSpace(); bool IsAtStartOfLine = Identifier.isAtStartOfLine(); - + Lex(Identifier); - + // If the identifier isn't on some OTHER line, inherit the leading // whitespace/first-on-a-line property of this token. This handles // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is @@ -221,12 +221,12 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, } ++NumFastMacroExpanded; return false; - + } else if (MI->getNumTokens() == 1 && isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(), *this)) { // Otherwise, if this macro expands into a single trivially-expanded - // token: expand it now. This handles common cases like + // token: expand it now. This handles common cases like // "#define VAL 42". // No need for arg info. @@ -236,38 +236,38 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // identifier to the expanded token. bool isAtStartOfLine = Identifier.isAtStartOfLine(); bool hasLeadingSpace = Identifier.hasLeadingSpace(); - + // Remember where the token is instantiated. SourceLocation InstantiateLoc = Identifier.getLocation(); - + // Replace the result token. Identifier = MI->getReplacementToken(0); - + // Restore the StartOfLine/LeadingSpace markers. Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine); Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace); - + // Update the tokens location to include both its instantiation and physical // locations. SourceLocation Loc = SourceMgr.createInstantiationLoc(Identifier.getLocation(), InstantiateLoc, InstantiationEnd,Identifier.getLength()); Identifier.setLocation(Loc); - + // If this is #define X X, we must mark the result as unexpandible. if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) if (getMacroInfo(NewII) == MI) Identifier.setFlag(Token::DisableExpand); - + // Since this is not an identifier token, it can't be macro expanded, so // we're done. ++NumFastMacroExpanded; return false; } - + // Start expanding the macro. EnterMacro(Identifier, InstantiationEnd, Args); - + // Now that the macro is at the top of the include stack, ask the // preprocessor to read the next token from it. Lex(Identifier); @@ -284,7 +284,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // The number of fixed arguments to parse. unsigned NumFixedArgsLeft = MI->getNumArgs(); bool isVariadic = MI->isVariadic(); - + // Outer loop, while there are more arguments, keep reading them. Token Tok; @@ -292,7 +292,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // an argument value in a macro could expand to ',' or '(' or ')'. LexUnexpandedToken(Tok); assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?"); - + // ArgTokens - Build up a list of tokens that make up each argument. Each // argument is separated by an EOF token. Use a SmallVector so we can avoid // heap allocations in the common case. @@ -302,19 +302,19 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, while (Tok.isNot(tok::r_paren)) { assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) && "only expect argument separators here"); - + unsigned ArgTokenStart = ArgTokens.size(); SourceLocation ArgStartLoc = Tok.getLocation(); - + // C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note // that we already consumed the first one. unsigned NumParens = 0; - + while (1) { // Read arguments as unexpanded tokens. This avoids issues, e.g., where // an argument value in a macro could expand to ',' or '(' or ')'. LexUnexpandedToken(Tok); - + if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n" Diag(MacroName, diag::err_unterm_macro_invoc); // Do not lose the EOF/EOM. Return it to the client. @@ -331,7 +331,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, } else if (Tok.is(tok::comma) && NumParens == 0) { // Comma ends this argument if there are more fixed arguments expected. // However, if this is a variadic macro, and this is part of the - // variadic part, then the comma is just an argument token. + // variadic part, then the comma is just an argument token. if (!isVariadic) break; if (NumFixedArgsLeft > 1) break; @@ -344,7 +344,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // expanding from to be popped off the expansion stack. Doing so causes // them to be reenabled for expansion. Here we record whether any // identifiers we lex as macro arguments correspond to disabled macros. - // If so, we mark the token as noexpand. This is a subtle aspect of + // If so, we mark the token as noexpand. This is a subtle aspect of // C99 6.10.3.4p2. if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo())) if (!MI->isEnabled()) @@ -352,7 +352,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, } ArgTokens.push_back(Tok); } - + // If this was an empty argument list foo(), don't add this as an empty // argument. if (ArgTokens.empty() && Tok.getKind() == tok::r_paren) @@ -363,18 +363,18 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, if (!isVariadic && NumFixedArgsLeft == 0) { if (ArgTokens.size() != ArgTokenStart) ArgStartLoc = ArgTokens[ArgTokenStart].getLocation(); - + // Emit the diagnostic at the macro name in case there is a missing ). // Emitting it at the , could be far away from the macro name. Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc); return 0; } - + // Empty arguments are standard in C99 and supported as an extension in // other modes. if (ArgTokens.size() == ArgTokenStart && !Features.C99) Diag(Tok, diag::ext_empty_fnmacro_arg); - + // Add a marker EOF token to the end of the token list for this argument. Token EOFTok; EOFTok.startToken(); @@ -386,19 +386,19 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, assert(NumFixedArgsLeft != 0 && "Too many arguments parsed"); --NumFixedArgsLeft; } - + // Okay, we either found the r_paren. Check to see if we parsed too few // arguments. unsigned MinArgsExpected = MI->getNumArgs(); - + // See MacroArgs instance var for description of this. bool isVarargsElided = false; - + if (NumActuals < MinArgsExpected) { // There are several cases where too few arguments is ok, handle them now. if (NumActuals == 0 && MinArgsExpected == 1) { // #define A(X) or #define A(...) ---> A() - + // If there is exactly one argument, and that argument is missing, // then we have an empty "()" argument empty list. This is fine, even if // the macro expects one argument (the argument is just empty). @@ -413,9 +413,9 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // Remember this occurred, allowing us to elide the comma when used for // cases like: - // #define A(x, foo...) blah(a, ## foo) - // #define B(x, ...) blah(a, ## __VA_ARGS__) - // #define C(...) blah(a, ## __VA_ARGS__) + // #define A(x, foo...) blah(a, ## foo) + // #define B(x, ...) blah(a, ## __VA_ARGS__) + // #define C(...) blah(a, ## __VA_ARGS__) // A(x) B(x) C() isVarargsElided = true; } else { @@ -423,7 +423,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, Diag(Tok, diag::err_too_few_args_in_macro_invoc); return 0; } - + // Add a marker EOF token to the end of the token list for this argument. SourceLocation EndLoc = Tok.getLocation(); Tok.startToken(); @@ -435,14 +435,14 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // If we expect two arguments, add both as empty. if (NumActuals == 0 && MinArgsExpected == 2) ArgTokens.push_back(Tok); - + } else if (NumActuals > MinArgsExpected && !MI->isVariadic()) { // Emit the diagnostic at the macro name in case there is a missing ). // Emitting it at the , could be far away from the macro name. Diag(MacroName, diag::err_too_many_args_in_macro_invoc); return 0; } - + return MacroArgs::create(MI, ArgTokens.data(), ArgTokens.size(), isVarargsElided); } @@ -454,15 +454,15 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, Preprocessor &PP) { time_t TT = time(0); struct tm *TM = localtime(&TT); - + static const char * const Months[] = { "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" }; - + char TmpBuffer[100]; - sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, + sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, TM->tm_year+1900); - + Token TmpTok; TmpTok.startToken(); PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok); @@ -478,12 +478,15 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, /// specified by the identifier. static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { const LangOptions &LangOpts = PP.getLangOptions(); - + switch (II->getLength()) { default: return false; case 6: if (II->isStr("blocks")) return LangOpts.Blocks; return false; + case 19: + if (II->isStr("objc_nonfragile_abi")) return LangOpts.ObjCNonFragileABI; + return false; case 22: if (II->isStr("attribute_overloadable")) return true; return false; @@ -507,12 +510,12 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // Figure out which token this is. IdentifierInfo *II = Tok.getIdentifierInfo(); assert(II && "Can't be a macro without id info!"); - + // If this is an _Pragma directive, expand it, invoke the pragma handler, then // lex the token after it. if (II == Ident_Pragma) return Handle_Pragma(Tok); - + ++NumBuiltinMacroExpanded; char TmpBuffer[100]; @@ -520,17 +523,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // Set up the return result. Tok.setIdentifierInfo(0); Tok.clearFlag(Token::NeedsCleaning); - + if (II == Ident__LINE__) { // C99 6.10.8: "__LINE__: The presumed line number (within the current // source file) of the current source line (an integer constant)". This can // be affected by #line. SourceLocation Loc = Tok.getLocation(); - + // Advance to the location of the first _, this might not be the first byte // of the token if it starts with an escaped newline. Loc = AdvanceToTokenCharacter(Loc, 0); - + // One wrinkle here is that GCC expands __LINE__ to location of the *end* of // a macro instantiation. This doesn't matter for object-like macros, but // can matter for a function-like macro that expands to contain __LINE__. @@ -538,7 +541,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // end of the instantiation history. Loc = SourceMgr.getInstantiationRange(Loc).second; PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc); - + // __LINE__ expands to a simple numeric value. sprintf(TmpBuffer, "%u", PLoc.getLine()); Tok.setKind(tok::numeric_constant); @@ -558,7 +561,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { NextLoc = PLoc.getIncludeLoc(); } } - + // Escape this filename. Turn '\' -> '\\' '"' -> '\"' std::string FN = PLoc.getFilename(); FN = '"' + Lexer::Stringify(FN) + '"'; @@ -586,12 +589,12 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // Compute the presumed include depth of this token. This can be affected // by GNU line markers. unsigned Depth = 0; - + PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation()); PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc()); for (; PLoc.isValid(); ++Depth) PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc()); - + // __INCLUDE_LEVEL__ expands to a simple numeric value. sprintf(TmpBuffer, "%u", Depth); Tok.setKind(tok::numeric_constant); @@ -605,10 +608,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // a macro, dig into the include stack. const FileEntry *CurFile = 0; PreprocessorLexer *TheLexer = getCurrentFileLexer(); - + if (TheLexer) CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID()); - + // If this file is older than the file it depends on, emit a diagnostic. const char *Result; if (CurFile) { @@ -619,14 +622,14 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Result = "??? ??? ?? ??:??:?? ????\n"; } TmpBuffer[0] = '"'; - strcpy(TmpBuffer+1, Result); - unsigned Len = strlen(TmpBuffer); - TmpBuffer[Len] = '"'; // Replace the newline with a quote. + unsigned Len = strlen(Result); + memcpy(TmpBuffer+1, Result, Len-1); // Copy string without the newline. + TmpBuffer[Len] = '"'; Tok.setKind(tok::string_literal); CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation()); } else if (II == Ident__COUNTER__) { Diag(Tok, diag::ext_pp_counter); - + // __COUNTER__ expands to a simple numeric value. sprintf(TmpBuffer, "%u", CounterValue++); Tok.setKind(tok::numeric_constant); @@ -635,10 +638,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { II == Ident__has_builtin) { // The argument to these two builtins should be a parenthesized identifier. SourceLocation StartLoc = Tok.getLocation(); - + bool IsValid = false; IdentifierInfo *FeatureII = 0; - + // Read the '('. Lex(Tok); if (Tok.is(tok::l_paren)) { @@ -646,25 +649,25 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Lex(Tok); if (Tok.is(tok::identifier)) { FeatureII = Tok.getIdentifierInfo(); - + // Read the ')'. Lex(Tok); if (Tok.is(tok::r_paren)) IsValid = true; } } - + bool Value = false; if (!IsValid) Diag(StartLoc, diag::err_feature_check_malformed); else if (II == Ident__has_builtin) { - // Check for a builtin is trivial. + // Check for a builtin is trivial. Value = FeatureII->getBuiltinID() != 0; } else { assert(II == Ident__has_feature && "Must be feature check"); Value = HasFeature(*this, FeatureII); } - + sprintf(TmpBuffer, "%d", (int)Value); Tok.setKind(tok::numeric_constant); CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation()); diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index 916bdefdf2aca..36ace8be7e062 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -37,7 +37,7 @@ PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, const unsigned char *ppcond, PTHManager &PM) : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0), PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) { - + FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); } @@ -47,25 +47,25 @@ LexNextToken: //===--------------------------------------==// // Read the raw token data. //===--------------------------------------==// - + // Shadow CurPtr into an automatic variable. - const unsigned char *CurPtrShadow = CurPtr; + const unsigned char *CurPtrShadow = CurPtr; // Read in the data for the token. unsigned Word0 = ReadLE32(CurPtrShadow); uint32_t IdentifierID = ReadLE32(CurPtrShadow); uint32_t FileOffset = ReadLE32(CurPtrShadow); - + tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF); Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF); uint32_t Len = Word0 >> 16; CurPtr = CurPtrShadow; - + //===--------------------------------------==// // Construct the token itself. //===--------------------------------------==// - + Tok.startToken(); Tok.setKind(TKind); Tok.setFlag(TFlags); @@ -80,57 +80,57 @@ LexNextToken: else if (IdentifierID) { MIOpt.ReadToken(); IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1); - + Tok.setIdentifierInfo(II); - + // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. Tok.setKind(II->getTokenID()); - + if (II->isHandleIdentifierCase()) PP->HandleIdentifier(Tok); return; } - + //===--------------------------------------==// // Process the token. //===--------------------------------------==// -#if 0 +#if 0 SourceManager& SM = PP->getSourceManager(); - llvm::cerr << SM.getFileEntryForID(FileID)->getName() + llvm::errs() << SM.getFileEntryForID(FileID)->getName() << ':' << SM.getLogicalLineNumber(Tok.getLocation()) << ':' << SM.getLogicalColumnNumber(Tok.getLocation()) << '\n'; -#endif +#endif if (TKind == tok::eof) { // Save the end-of-file token. EofToken = Tok; - + Preprocessor *PPCache = PP; - + assert(!ParsingPreprocessorDirective); assert(!LexingRawMode); - + // FIXME: Issue diagnostics similar to Lexer. if (PP->HandleEndOfFile(Tok, false)) return; - + assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); return PPCache->Lex(Tok); } - + if (TKind == tok::hash && Tok.isAtStartOfLine()) { LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; assert(!LexingRawMode); PP->HandleDirective(Tok); - + if (PP->isCurrentLexer(this)) goto LexNextToken; - + return PP->Lex(Tok); } - + if (TKind == tok::eom) { assert(ParsingPreprocessorDirective); ParsingPreprocessorDirective = false; @@ -154,7 +154,7 @@ void PTHLexer::DiscardToEndOfLine() { // We assume that if the preprocessor wishes to discard to the end of // the line that it also means to end the current preprocessor directive. ParsingPreprocessorDirective = false; - + // Skip tokens by only peeking at their token kind and the flags. // We don't need to actually reconstruct full tokens from the token buffer. // This saves some copies and it also reduces IdentifierInfo* lookup. @@ -163,7 +163,7 @@ void PTHLexer::DiscardToEndOfLine() { // Read the token kind. Are we at the end of the file? tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; if (x == tok::eof) break; - + // Read the token flags. Are we at the start of the next line? Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; if (y & Token::StartOfLine) break; @@ -171,7 +171,7 @@ void PTHLexer::DiscardToEndOfLine() { // Skip to the next token. p += DISK_TOKEN_SIZE; } - + CurPtr = p; } @@ -179,18 +179,18 @@ void PTHLexer::DiscardToEndOfLine() { bool PTHLexer::SkipBlock() { assert(CurPPCondPtr && "No cached PP conditional information."); assert(LastHashTokPtr && "No known '#' token."); - + const unsigned char* HashEntryI = 0; - uint32_t Offset; + uint32_t Offset; uint32_t TableIdx; - + do { // Read the token offset from the side-table. Offset = ReadLE32(CurPPCondPtr); - - // Read the target table index from the side-table. + + // Read the target table index from the side-table. TableIdx = ReadLE32(CurPPCondPtr); - + // Compute the actual memory address of the '#' token data for this entry. HashEntryI = TokBuf + Offset; @@ -208,7 +208,7 @@ bool PTHLexer::SkipBlock() { // Read where we should jump to. uint32_t TmpOffset = ReadLE32(NextPPCondPtr); const unsigned char* HashEntryJ = TokBuf + TmpOffset; - + if (HashEntryJ <= LastHashTokPtr) { // Jump directly to the next entry in the side table. HashEntryI = HashEntryJ; @@ -218,23 +218,23 @@ bool PTHLexer::SkipBlock() { } } } - while (HashEntryI < LastHashTokPtr); + while (HashEntryI < LastHashTokPtr); assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); assert(TableIdx && "No jumping from #endifs."); - + // Update our side-table iterator. const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); assert(NextPPCondPtr >= CurPPCondPtr); CurPPCondPtr = NextPPCondPtr; - + // Read where we should jump to. HashEntryI = TokBuf + ReadLE32(NextPPCondPtr); uint32_t NextIdx = ReadLE32(NextPPCondPtr); - + // By construction NextIdx will be zero if this is a #endif. This is useful // to know to obviate lexing another token. bool isEndif = NextIdx == 0; - + // This case can occur when we see something like this: // // #if ... @@ -243,7 +243,7 @@ bool PTHLexer::SkipBlock() { // // If we are skipping the first #if block it will be the case that CurPtr // already points 'elif'. Just return. - + if (CurPtr > HashEntryI) { assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); // Did we reach a #endif? If so, go ahead and consume that token as well. @@ -251,13 +251,13 @@ bool PTHLexer::SkipBlock() { CurPtr += DISK_TOKEN_SIZE*2; else LastHashTokPtr = HashEntryI; - + return isEndif; } // Otherwise, we need to advance. Update CurPtr to point to the '#' token. CurPtr = HashEntryI; - + // Update the location of the last observed '#'. This is useful if we // are skipping multiple blocks. LastHashTokPtr = CurPtr; @@ -265,7 +265,7 @@ bool PTHLexer::SkipBlock() { // Skip the '#' token. assert(((tok::TokenKind)*CurPtr) == tok::hash); CurPtr += DISK_TOKEN_SIZE; - + // Did we reach a #endif? If so, go ahead and consume that token as well. if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } @@ -297,12 +297,12 @@ class VISIBILITY_HIDDEN PTHFileData { public: PTHFileData(uint32_t tokenOff, uint32_t ppCondOff) : TokenOff(tokenOff), PPCondOff(ppCondOff) {} - - uint32_t getTokenOffset() const { return TokenOff; } - uint32_t getPPCondOffset() const { return PPCondOff; } + + uint32_t getTokenOffset() const { return TokenOff; } + uint32_t getPPCondOffset() const { return PPCondOff; } }; - - + + class VISIBILITY_HIDDEN PTHFileLookupCommonTrait { public: typedef std::pair<unsigned char, const char*> internal_key_type; @@ -310,84 +310,84 @@ public: static unsigned ComputeHash(internal_key_type x) { return BernsteinHash(x.second); } - + static std::pair<unsigned, unsigned> ReadKeyDataLength(const unsigned char*& d) { unsigned keyLen = (unsigned) ReadUnalignedLE16(d); unsigned dataLen = (unsigned) *(d++); return std::make_pair(keyLen, dataLen); } - + static internal_key_type ReadKey(const unsigned char* d, unsigned) { unsigned char k = *(d++); // Read the entry kind. return std::make_pair(k, (const char*) d); } }; - + class VISIBILITY_HIDDEN PTHFileLookupTrait : public PTHFileLookupCommonTrait { public: typedef const FileEntry* external_key_type; typedef PTHFileData data_type; - + static internal_key_type GetInternalKey(const FileEntry* FE) { return std::make_pair((unsigned char) 0x1, FE->getName()); } static bool EqualKey(internal_key_type a, internal_key_type b) { return a.first == b.first && strcmp(a.second, b.second) == 0; - } - - static PTHFileData ReadData(const internal_key_type& k, - const unsigned char* d, unsigned) { + } + + static PTHFileData ReadData(const internal_key_type& k, + const unsigned char* d, unsigned) { assert(k.first == 0x1 && "Only file lookups can match!"); uint32_t x = ::ReadUnalignedLE32(d); uint32_t y = ::ReadUnalignedLE32(d); - return PTHFileData(x, y); + return PTHFileData(x, y); } }; class VISIBILITY_HIDDEN PTHStringLookupTrait { public: - typedef uint32_t + typedef uint32_t data_type; typedef const std::pair<const char*, unsigned> external_key_type; typedef external_key_type internal_key_type; - + static bool EqualKey(const internal_key_type& a, const internal_key_type& b) { return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0 : false; } - + static unsigned ComputeHash(const internal_key_type& a) { return BernsteinHash(a.first, a.second); } - + // This hopefully will just get inlined and removed by the optimizer. static const internal_key_type& GetInternalKey(const external_key_type& x) { return x; } - + static std::pair<unsigned, unsigned> ReadKeyDataLength(const unsigned char*& d) { return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t)); } - + static std::pair<const char*, unsigned> ReadKey(const unsigned char* d, unsigned n) { assert(n >= 2 && d[n-1] == '\0'); return std::make_pair((const char*) d, n-1); } - + static uint32_t ReadData(const internal_key_type& k, const unsigned char* d, unsigned) { return ::ReadUnalignedLE32(d); } }; - -} // end anonymous namespace + +} // end anonymous namespace typedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup; typedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup; @@ -398,7 +398,7 @@ typedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup; PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, const unsigned char* idDataTable, - IdentifierInfo** perIDCache, + IdentifierInfo** perIDCache, void* stringIdLookup, unsigned numIds, const unsigned char* spellingBase, const char* originalSourceFile) @@ -416,7 +416,7 @@ PTHManager::~PTHManager() { static void InvalidPTH(Diagnostic *Diags, Diagnostic::Level level, const char* Msg = 0) { - if (!Diags) return; + if (!Diags) return; if (!Msg) Msg = "Invalid or corrupted PTH file"; unsigned DiagID = Diags->getCustomDiagID(level, Msg); Diags->Report(FullSourceLoc(), DiagID); @@ -427,7 +427,7 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags, // Memory map the PTH file. llvm::OwningPtr<llvm::MemoryBuffer> File(llvm::MemoryBuffer::getFile(file.c_str())); - + if (!File) { if (Diags) { unsigned DiagID = Diags->getCustomDiagID(level, @@ -437,7 +437,7 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags, return 0; } - + // Get the buffer ranges and check if there are at least three 32-bit // words at the end of the file. const unsigned char* BufBeg = (unsigned char*)File->getBufferStart(); @@ -449,54 +449,54 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags, InvalidPTH(Diags, level); return 0; } - + // Read the PTH version. const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1); unsigned Version = ReadLE32(p); - + if (Version != PTHManager::Version) { InvalidPTH(Diags, level, - Version < PTHManager::Version + Version < PTHManager::Version ? "PTH file uses an older PTH format that is no longer supported" : "PTH file uses a newer PTH format that cannot be read"); return 0; } - // Compute the address of the index table at the end of the PTH file. + // Compute the address of the index table at the end of the PTH file. const unsigned char *PrologueOffset = p; - + if (PrologueOffset >= BufEnd) { InvalidPTH(Diags, level); return 0; } - + // Construct the file lookup table. This will be used for mapping from // FileEntry*'s to cached tokens. const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2; const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset); - + if (!(FileTable > BufBeg && FileTable < BufEnd)) { InvalidPTH(Diags, level); return 0; // FIXME: Proper error diagnostic? } - + llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); - + // Warn if the PTH file is empty. We still want to create a PTHManager // as the PTH could be used with -include-pth. if (FL->isEmpty()) InvalidPTH(Diags, level, "PTH file contains no cached source data"); - + // Get the location of the table mapping from persistent ids to the // data needed to reconstruct identifiers. const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0; const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset); - + if (!(IData >= BufBeg && IData < BufEnd)) { InvalidPTH(Diags, level); return 0; } - + // Get the location of the hashtable mapping between strings and // persistent IDs. const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1; @@ -508,7 +508,7 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags, llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable, BufBeg)); - + // Get the location of the spelling cache. const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3; const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset); @@ -516,19 +516,19 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags, InvalidPTH(Diags, level); return 0; } - + // Get the number of IdentifierInfos and pre-allocate the identifier cache. uint32_t NumIds = ReadLE32(IData); - + // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() // so that we in the best case only zero out memory once when the OS returns // us new pages. IdentifierInfo** PerIDCache = 0; - + if (NumIds) { - PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache)); + PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache)); if (!PerIDCache) { - InvalidPTH(Diags, level, + InvalidPTH(Diags, level, "Could not allocate memory for processing PTH file"); return 0; } @@ -537,8 +537,8 @@ PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags, // Compute the address of the original source file. const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4; unsigned len = ReadUnalignedLE16(originalSourceBase); - if (!len) originalSourceBase = 0; - + if (!len) originalSourceBase = 0; + // Create the new PTHManager. return new PTHManager(File.take(), FL.take(), IData, PerIDCache, SL.take(), NumIds, spellingBase, @@ -551,7 +551,7 @@ IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { const unsigned char* IDData = (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry); assert(IDData < (const unsigned char*)Buf->getBufferEnd()); - + // Allocate the object. std::pair<IdentifierInfo,const unsigned char*> *Mem = Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >(); @@ -559,7 +559,7 @@ IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { Mem->second = IDData; assert(IDData[0] != '\0'); IdentifierInfo *II = new ((void*) Mem) IdentifierInfo(); - + // Store the new IdentifierInfo in the cache. PerIDCache[PersistentID] = II; assert(II->getName() && II->getName()[0] != '\0'); @@ -584,18 +584,18 @@ PTHLexer *PTHManager::CreateLexer(FileID FID) { const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID); if (!FE) return 0; - + // Lookup the FileEntry object in our file lookup data structure. It will // return a variant that indicates whether or not there is an offset within // the PTH file that contains cached tokens. PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup); PTHFileLookup::iterator I = PFL.find(FE); - + if (I == PFL.end()) // No tokens available? return 0; - - const PTHFileData& FileData = *I; - + + const PTHFileData& FileData = *I; + const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart(); // Compute the offset of the token data within the buffer. const unsigned char* data = BufStart + FileData.getTokenOffset(); @@ -604,9 +604,9 @@ PTHLexer *PTHManager::CreateLexer(FileID FID) { const unsigned char* ppcond = BufStart + FileData.getPPCondOffset(); uint32_t Len = ReadLE32(ppcond); if (Len == 0) ppcond = 0; - + assert(PP && "No preprocessor set yet!"); - return new PTHLexer(*PP, FID, data, ppcond, *this); + return new PTHLexer(*PP, FID, data, ppcond, *this); } //===----------------------------------------------------------------------===// @@ -622,19 +622,19 @@ public: const mode_t mode; const time_t mtime; const off_t size; - + PTHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s) - : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {} - + : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {} + PTHStatData() : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {} }; - + class VISIBILITY_HIDDEN PTHStatLookupTrait : public PTHFileLookupCommonTrait { public: typedef const char* external_key_type; // const char* typedef PTHStatData data_type; - + static internal_key_type GetInternalKey(const char *path) { // The key 'kind' doesn't matter here because it is ignored in EqualKey. return std::make_pair((unsigned char) 0x0, path); @@ -644,17 +644,17 @@ public: // When doing 'stat' lookups we don't care about the kind of 'a' and 'b', // just the paths. return strcmp(a.second, b.second) == 0; - } - + } + static data_type ReadData(const internal_key_type& k, const unsigned char* d, - unsigned) { - + unsigned) { + if (k.first /* File or Directory */) { if (k.first == 0x1 /* File */) d += 4 * 2; // Skip the first 2 words. ino_t ino = (ino_t) ReadUnalignedLE32(d); dev_t dev = (dev_t) ReadUnalignedLE32(d); mode_t mode = (mode_t) ReadUnalignedLE16(d); - time_t mtime = (time_t) ReadUnalignedLE64(d); + time_t mtime = (time_t) ReadUnalignedLE64(d); return data_type(ino, dev, mode, mtime, (off_t) ReadUnalignedLE64(d)); } @@ -667,22 +667,22 @@ class VISIBILITY_HIDDEN PTHStatCache : public StatSysCallCache { typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy; CacheTy Cache; -public: +public: PTHStatCache(PTHFileLookup &FL) : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(), FL.getBase()) {} ~PTHStatCache() {} - + int stat(const char *path, struct stat *buf) { // Do the lookup for the file's data in the PTH file. CacheTy::iterator I = Cache.find(path); // If we don't get a hit in the PTH file just forward to 'stat'. if (I == Cache.end()) return ::stat(path, buf); - + const PTHStatData& Data = *I; - + if (!Data.hasStat) return 1; diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index bb0b71e226823..8b46f716910c9 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -44,9 +44,9 @@ PragmaHandler *PragmaNamespace::FindHandler(const IdentifierInfo *Name, bool IgnoreNull) const { PragmaHandler *NullHandler = 0; for (unsigned i = 0, e = Handlers.size(); i != e; ++i) { - if (Handlers[i]->getName() == Name) + if (Handlers[i]->getName() == Name) return Handlers[i]; - + if (Handlers[i]->getName() == 0) NullHandler = Handlers[i]; } @@ -68,14 +68,14 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, Token &Tok) { // Read the 'namespace' that the directive is in, e.g. STDC. Do not macro // expand it, the user can have a STDC #define, that should not affect this. PP.LexUnexpandedToken(Tok); - + // Get the handler for this token. If there is no handler, ignore the pragma. PragmaHandler *Handler = FindHandler(Tok.getIdentifierInfo(), false); if (Handler == 0) { PP.Diag(Tok, diag::warn_pragma_ignored); return; } - + // Otherwise, pass it down. Handler->HandlePragma(PP, Tok); } @@ -88,11 +88,11 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, Token &Tok) { /// rest of the pragma, passing it to the registered pragma handlers. void Preprocessor::HandlePragmaDirective() { ++NumPragma; - + // Invoke the first level of pragma handlers which reads the namespace id. Token Tok; PragmaHandlers->HandlePragma(*this, Tok); - + // If the pragma handler didn't read the rest of the line, consume it now. if (CurPPLexer && CurPPLexer->ParsingPreprocessorDirective) DiscardUntilEndOfDirective(); @@ -104,7 +104,7 @@ void Preprocessor::HandlePragmaDirective() { void Preprocessor::Handle_Pragma(Token &Tok) { // Remember the pragma token location. SourceLocation PragmaLoc = Tok.getLocation(); - + // Read the '('. Lex(Tok); if (Tok.isNot(tok::l_paren)) { @@ -118,7 +118,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { Diag(PragmaLoc, diag::err__Pragma_malformed); return; } - + // Remember the string. std::string StrVal = getSpelling(Tok); @@ -128,9 +128,9 @@ void Preprocessor::Handle_Pragma(Token &Tok) { Diag(PragmaLoc, diag::err__Pragma_malformed); return; } - + SourceLocation RParenLoc = Tok.getLocation(); - + // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1: // "The string literal is destringized by deleting the L prefix, if present, // deleting the leading and trailing double-quotes, replacing each escape @@ -140,14 +140,14 @@ void Preprocessor::Handle_Pragma(Token &Tok) { StrVal.erase(StrVal.begin()); assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && "Invalid string token!"); - + // Remove the front quote, replacing it with a space, so that the pragma // contents appear to have a space before them. StrVal[0] = ' '; - + // Replace the terminating quote with a \n. StrVal[StrVal.size()-1] = '\n'; - + // Remove escaped quotes and escapes. for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) { if (StrVal[i] == '\\' && @@ -157,7 +157,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { --e; } } - + // Plop the string (including the newline and trailing null) into a buffer // where we can lex it. Token TmpTok; @@ -174,7 +174,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // With everything set up, lex this as a #pragma directive. HandlePragmaDirective(); - + // Finally, return whatever came after the pragma directive. return Lex(Tok); } @@ -188,7 +188,7 @@ void Preprocessor::HandlePragmaOnce(Token &OnceTok) { Diag(OnceTok, diag::pp_pragma_once_in_main_file); return; } - + // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc. // Mark the file as a once-only file now. HeaderInfo.MarkFileIncludeOnce(getCurrentFileLexer()->getFileEntry()); @@ -217,27 +217,27 @@ void Preprocessor::HandlePragmaPoison(Token &PoisonTok) { if (CurPPLexer) CurPPLexer->LexingRawMode = true; LexUnexpandedToken(Tok); if (CurPPLexer) CurPPLexer->LexingRawMode = false; - + // If we reached the end of line, we're done. if (Tok.is(tok::eom)) return; - + // Can only poison identifiers. if (Tok.isNot(tok::identifier)) { Diag(Tok, diag::err_pp_invalid_poison); return; } - + // Look up the identifier info for the token. We disabled identifier lookup // by saying we're skipping contents, so we need to do this manually. IdentifierInfo *II = LookUpIdentifierInfo(Tok); - + // Already poisoned. if (II->isPoisoned()) continue; - + // If this is a macro identifier, emit a warning. if (II->hasMacroDefinition()) Diag(Tok, diag::pp_poisoning_existing_macro); - + // Finally, poison it! II->setIsPoisoned(); } @@ -250,25 +250,25 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file); return; } - + // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc. PreprocessorLexer *TheLexer = getCurrentFileLexer(); - + // Mark the file as a system header. HeaderInfo.MarkFileSystemHeader(TheLexer->getFileEntry()); - - + + PresumedLoc PLoc = SourceMgr.getPresumedLoc(SysHeaderTok.getLocation()); unsigned FilenameLen = strlen(PLoc.getFilename()); unsigned FilenameID = SourceMgr.getLineTableFilenameID(PLoc.getFilename(), FilenameLen); - + // Emit a line marker. This will change any source locations from this point // forward to realize they are in a system header. // Create a line note with this information. SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine(), FilenameID, false, false, true, false); - + // Notify the client, if desired, that we are in a new source file. if (Callbacks) Callbacks->FileChanged(SysHeaderTok.getLocation(), @@ -284,11 +284,11 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { // If the token kind is EOM, the error has already been diagnosed. if (FilenameTok.is(tok::eom)) return; - + // Reserve a buffer to get the spelling. llvm::SmallVector<char, 128> FilenameBuffer; FilenameBuffer.resize(FilenameTok.getLength()); - + const char *FilenameStart = &FilenameBuffer[0]; unsigned Len = getSpelling(FilenameTok, FilenameStart); const char *FilenameEnd = FilenameStart+Len; @@ -298,7 +298,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { // error. if (FilenameStart == 0) return; - + // Search include directories for this file. const DirectoryLookup *CurDir; const FileEntry *File = LookupFile(FilenameStart, FilenameEnd, @@ -308,7 +308,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { << std::string(FilenameStart, FilenameEnd); return; } - + const FileEntry *CurFile = getCurrentFileLexer()->getFileEntry(); // If this file is older than the file it depends on, emit a diagnostic. @@ -320,7 +320,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { Message += getSpelling(DependencyTok) + " "; Lex(DependencyTok); } - + Message.erase(Message.end()-1); Diag(FilenameTok, diag::pp_out_of_date_dependency) << Message; } @@ -339,23 +339,23 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { Diag(CommentLoc, diag::err_pragma_comment_malformed); return; } - + // Read the identifier. Lex(Tok); if (Tok.isNot(tok::identifier)) { Diag(CommentLoc, diag::err_pragma_comment_malformed); return; } - + // Verify that this is one of the 5 whitelisted options. // FIXME: warn that 'exestr' is deprecated. const IdentifierInfo *II = Tok.getIdentifierInfo(); - if (!II->isStr("compiler") && !II->isStr("exestr") && !II->isStr("lib") && + if (!II->isStr("compiler") && !II->isStr("exestr") && !II->isStr("lib") && !II->isStr("linker") && !II->isStr("user")) { Diag(Tok.getLocation(), diag::err_pragma_comment_unknown_kind); return; } - + // Read the optional string if present. Lex(Tok); std::string ArgumentString; @@ -390,13 +390,13 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { ArgumentString = std::string(Literal.GetString(), Literal.GetString()+Literal.GetStringLength()); } - + // FIXME: If the kind is "compiler" warn if the string is present (it is // ignored). // FIXME: 'lib' requires a comment string. // FIXME: 'linker' requires a comment string, and has a specific list of // things that are allowable. - + if (Tok.isNot(tok::r_paren)) { Diag(Tok.getLocation(), diag::err_pragma_comment_malformed); return; @@ -407,7 +407,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { Diag(Tok.getLocation(), diag::err_pragma_comment_malformed); return; } - + // If the pragma is lexically sound, notify any interested PPCallbacks. if (Callbacks) Callbacks->PragmaComment(CommentLoc, II, ArgumentString); @@ -419,14 +419,14 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { /// AddPragmaHandler - Add the specified pragma handler to the preprocessor. /// If 'Namespace' is non-null, then it is a token required to exist on the /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". -void Preprocessor::AddPragmaHandler(const char *Namespace, +void Preprocessor::AddPragmaHandler(const char *Namespace, PragmaHandler *Handler) { PragmaNamespace *InsertNS = PragmaHandlers; - + // If this is specified to be in a namespace, step down into it. if (Namespace) { IdentifierInfo *NSID = getIdentifierInfo(Namespace); - + // If there is already a pragma handler with the name of this namespace, // we either have an error (directive with the same name as a namespace) or // we already have the namespace to insert into. @@ -441,7 +441,7 @@ void Preprocessor::AddPragmaHandler(const char *Namespace, PragmaHandlers->AddPragma(InsertNS); } } - + // Check to make sure we don't already have a pragma for this identifier. assert(!InsertNS->FindHandler(Handler->getName()) && "Pragma handler already exists for this identifier!"); @@ -455,7 +455,7 @@ void Preprocessor::AddPragmaHandler(const char *Namespace, void Preprocessor::RemovePragmaHandler(const char *Namespace, PragmaHandler *Handler) { PragmaNamespace *NS = PragmaHandlers; - + // If this is specified to be in a namespace, step down into it. if (Namespace) { IdentifierInfo *NSID = getIdentifierInfo(Namespace); @@ -467,7 +467,7 @@ void Preprocessor::RemovePragmaHandler(const char *Namespace, } NS->RemovePragmaHandler(Handler); - + // If this is a non-default namespace and it is now empty, remove // it. if (NS != PragmaHandlers && NS->IsEmpty()) @@ -516,19 +516,29 @@ struct PragmaDependencyHandler : public PragmaHandler { PP.HandlePragmaDependency(DepToken); } }; - + /// PragmaDiagnosticHandler - e.g. '#pragma GCC diagnostic ignored "-Wformat"' +/// Since clang's diagnostic supports extended functionality beyond GCC's +/// the constructor takes a clangMode flag to tell it whether or not to allow +/// clang's extended functionality, or whether to reject it. struct PragmaDiagnosticHandler : public PragmaHandler { - PragmaDiagnosticHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} +private: + const bool ClangMode; +public: + PragmaDiagnosticHandler(const IdentifierInfo *ID, + const bool clangMode) : PragmaHandler(ID), + ClangMode(clangMode) {} virtual void HandlePragma(Preprocessor &PP, Token &DiagToken) { Token Tok; PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::identifier)) { - PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid); + unsigned Diag = ClangMode ? diag::warn_pragma_diagnostic_clang_invalid + : diag::warn_pragma_diagnostic_gcc_invalid; + PP.Diag(Tok, Diag); return; } IdentifierInfo *II = Tok.getIdentifierInfo(); - + diag::Mapping Map; if (II->isStr("warning")) Map = diag::MAP_WARNING; @@ -538,11 +548,25 @@ struct PragmaDiagnosticHandler : public PragmaHandler { Map = diag::MAP_IGNORE; else if (II->isStr("fatal")) Map = diag::MAP_FATAL; - else { - PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid); + else if (ClangMode) { + if (II->isStr("pop")) { + if (!PP.getDiagnostics().popMappings()) + PP.Diag(Tok, diag::warn_pragma_diagnostic_clang_cannot_ppp); + return; + } + + if (II->isStr("push")) { + PP.getDiagnostics().pushMappings(); + return; + } + + PP.Diag(Tok, diag::warn_pragma_diagnostic_clang_invalid); + return; + } else { + PP.Diag(Tok, diag::warn_pragma_diagnostic_gcc_invalid); return; } - + PP.LexUnexpandedToken(Tok); // We need at least one string. @@ -550,7 +574,7 @@ struct PragmaDiagnosticHandler : public PragmaHandler { PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token); return; } - + // String concatenation allows multiple strings, which can even come from // macro expansion. // "foo " "bar" "Baz" @@ -559,22 +583,24 @@ struct PragmaDiagnosticHandler : public PragmaHandler { StrToks.push_back(Tok); PP.LexUnexpandedToken(Tok); } - + if (Tok.isNot(tok::eom)) { PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token); return; } - + // Concatenate and parse the strings. StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP); assert(!Literal.AnyWide && "Didn't allow wide strings in"); if (Literal.hadError) return; if (Literal.Pascal) { - PP.Diag(StrToks[0].getLocation(), diag::warn_pragma_diagnostic_invalid); + unsigned Diag = ClangMode ? diag::warn_pragma_diagnostic_clang_invalid + : diag::warn_pragma_diagnostic_gcc_invalid; + PP.Diag(Tok, Diag); return; } - + std::string WarningName(Literal.GetString(), Literal.GetString()+Literal.GetStringLength()); @@ -584,14 +610,14 @@ struct PragmaDiagnosticHandler : public PragmaHandler { diag::warn_pragma_diagnostic_invalid_option); return; } - + if (PP.getDiagnostics().setDiagnosticGroupMapping(WarningName.c_str()+2, Map)) PP.Diag(StrToks[0].getLocation(), diag::warn_pragma_diagnostic_unknown_warning) << WarningName; } }; - + /// PragmaCommentHandler - "#pragma comment ...". struct PragmaCommentHandler : public PragmaHandler { PragmaCommentHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} @@ -599,13 +625,13 @@ struct PragmaCommentHandler : public PragmaHandler { PP.HandlePragmaComment(CommentTok); } }; - + // Pragma STDC implementations. enum STDCSetting { STDC_ON, STDC_OFF, STDC_DEFAULT, STDC_INVALID }; - + static STDCSetting LexOnOffSwitch(Preprocessor &PP) { Token Tok; PP.LexUnexpandedToken(Tok); @@ -633,7 +659,7 @@ static STDCSetting LexOnOffSwitch(Preprocessor &PP) { PP.Diag(Tok, diag::ext_stdc_pragma_syntax_eom); return Result; } - + /// PragmaSTDC_FP_CONTRACTHandler - "#pragma STDC FP_CONTRACT ...". struct PragmaSTDC_FP_CONTRACTHandler : public PragmaHandler { PragmaSTDC_FP_CONTRACTHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} @@ -645,7 +671,7 @@ struct PragmaSTDC_FP_CONTRACTHandler : public PragmaHandler { LexOnOffSwitch(PP); } }; - + /// PragmaSTDC_FENV_ACCESSHandler - "#pragma STDC FENV_ACCESS ...". struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler { PragmaSTDC_FENV_ACCESSHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} @@ -654,7 +680,7 @@ struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler { PP.Diag(Tok, diag::warn_stdc_fenv_access_not_supported); } }; - + /// PragmaSTDC_CX_LIMITED_RANGEHandler - "#pragma STDC CX_LIMITED_RANGE ...". struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { PragmaSTDC_CX_LIMITED_RANGEHandler(const IdentifierInfo *ID) @@ -663,7 +689,7 @@ struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { LexOnOffSwitch(PP); } }; - + /// PragmaSTDC_UnknownHandler - "#pragma STDC ...". struct PragmaSTDC_UnknownHandler : public PragmaHandler { PragmaSTDC_UnknownHandler() : PragmaHandler(0) {} @@ -672,7 +698,7 @@ struct PragmaSTDC_UnknownHandler : public PragmaHandler { PP.Diag(UnknownTok, diag::ext_stdc_pragma_ignored); } }; - + } // end anonymous namespace @@ -681,7 +707,7 @@ struct PragmaSTDC_UnknownHandler : public PragmaHandler { void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler(0, new PragmaOnceHandler(getIdentifierInfo("once"))); AddPragmaHandler(0, new PragmaMarkHandler(getIdentifierInfo("mark"))); - + // #pragma GCC ... AddPragmaHandler("GCC", new PragmaPoisonHandler(getIdentifierInfo("poison"))); AddPragmaHandler("GCC", new PragmaSystemHeaderHandler( @@ -689,7 +715,8 @@ void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler("GCC", new PragmaDependencyHandler( getIdentifierInfo("dependency"))); AddPragmaHandler("GCC", new PragmaDiagnosticHandler( - getIdentifierInfo("diagnostic"))); + getIdentifierInfo("diagnostic"), + false)); // #pragma clang ... AddPragmaHandler("clang", new PragmaPoisonHandler( getIdentifierInfo("poison"))); @@ -698,7 +725,8 @@ void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler("clang", new PragmaDependencyHandler( getIdentifierInfo("dependency"))); AddPragmaHandler("clang", new PragmaDiagnosticHandler( - getIdentifierInfo("diagnostic"))); + getIdentifierInfo("diagnostic"), + true)); AddPragmaHandler("STDC", new PragmaSTDC_FP_CONTRACTHandler( getIdentifierInfo("FP_CONTRACT"))); @@ -707,7 +735,7 @@ void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler( getIdentifierInfo("CX_LIMITED_RANGE"))); AddPragmaHandler("STDC", new PragmaSTDC_UnknownHandler()); - + // MS extensions. if (Features.Microsoft) AddPragmaHandler(0, new PragmaCommentHandler(getIdentifierInfo("comment"))); diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 9f0c15f59e49c..bfa090a09e870 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -37,7 +37,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include <cstdio> using namespace clang; @@ -46,7 +46,7 @@ using namespace clang; PreprocessorFactory::~PreprocessorFactory() {} Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, - TargetInfo &target, SourceManager &SM, + TargetInfo &target, SourceManager &SM, HeaderSearch &Headers, IdentifierInfoLookup* IILookup) : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()), @@ -54,20 +54,20 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, BuiltinInfo(Target), CurPPLexer(0), CurDirLookup(0), Callbacks(0) { ScratchBuf = new ScratchBuffer(SourceMgr); CounterValue = 0; // __COUNTER__ starts at 0. - + // Clear stats. NumDirectives = NumDefined = NumUndefined = NumPragma = 0; NumIf = NumElse = NumEndif = 0; NumEnteredSourceFiles = 0; NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; - MaxIncludeStackDepth = 0; + MaxIncludeStackDepth = 0; NumSkipped = 0; // Default to discarding comments. KeepComments = false; KeepMacroComments = false; - + // Macro expansion is enabled. DisableMacroExpansion = false; InMacroArgs = false; @@ -78,11 +78,11 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. // This gets unpoisoned where it is allowed. (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); - + // Initialize the pragma handlers. PragmaHandlers = new PragmaNamespace(0); RegisterBuiltinPragmas(); - + // Initialize builtin macros like __LINE__ and friends. RegisterBuiltinMacros(); } @@ -106,11 +106,11 @@ Preprocessor::~Preprocessor() { I->second->Destroy(BP); I->first->setHasMacroDefinition(false); } - + // Free any cached macro expanders. for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) delete TokenLexerCache[i]; - + // Release pragma information. delete PragmaHandlers; @@ -126,27 +126,27 @@ void Preprocessor::setPTHManager(PTHManager* pm) { } void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { - llvm::cerr << tok::getTokenName(Tok.getKind()) << " '" - << getSpelling(Tok) << "'"; - + llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" + << getSpelling(Tok) << "'"; + if (!DumpFlags) return; - - llvm::cerr << "\t"; + + llvm::errs() << "\t"; if (Tok.isAtStartOfLine()) - llvm::cerr << " [StartOfLine]"; + llvm::errs() << " [StartOfLine]"; if (Tok.hasLeadingSpace()) - llvm::cerr << " [LeadingSpace]"; + llvm::errs() << " [LeadingSpace]"; if (Tok.isExpandDisabled()) - llvm::cerr << " [ExpandDisabled]"; + llvm::errs() << " [ExpandDisabled]"; if (Tok.needsCleaning()) { const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); - llvm::cerr << " [UnClean='" << std::string(Start, Start+Tok.getLength()) - << "']"; + llvm::errs() << " [UnClean='" << std::string(Start, Start+Tok.getLength()) + << "']"; } - - llvm::cerr << "\tLoc=<"; + + llvm::errs() << "\tLoc=<"; DumpLocation(Tok.getLocation()); - llvm::cerr << ">"; + llvm::errs() << ">"; } void Preprocessor::DumpLocation(SourceLocation Loc) const { @@ -154,32 +154,32 @@ void Preprocessor::DumpLocation(SourceLocation Loc) const { } void Preprocessor::DumpMacro(const MacroInfo &MI) const { - llvm::cerr << "MACRO: "; + llvm::errs() << "MACRO: "; for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { DumpToken(MI.getReplacementToken(i)); - llvm::cerr << " "; + llvm::errs() << " "; } - llvm::cerr << "\n"; + llvm::errs() << "\n"; } void Preprocessor::PrintStats() { - llvm::cerr << "\n*** Preprocessor Stats:\n"; - llvm::cerr << NumDirectives << " directives found:\n"; - llvm::cerr << " " << NumDefined << " #define.\n"; - llvm::cerr << " " << NumUndefined << " #undef.\n"; - llvm::cerr << " #include/#include_next/#import:\n"; - llvm::cerr << " " << NumEnteredSourceFiles << " source files entered.\n"; - llvm::cerr << " " << MaxIncludeStackDepth << " max include stack depth\n"; - llvm::cerr << " " << NumIf << " #if/#ifndef/#ifdef.\n"; - llvm::cerr << " " << NumElse << " #else/#elif.\n"; - llvm::cerr << " " << NumEndif << " #endif.\n"; - llvm::cerr << " " << NumPragma << " #pragma.\n"; - llvm::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; - - llvm::cerr << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" + llvm::errs() << "\n*** Preprocessor Stats:\n"; + llvm::errs() << NumDirectives << " directives found:\n"; + llvm::errs() << " " << NumDefined << " #define.\n"; + llvm::errs() << " " << NumUndefined << " #undef.\n"; + llvm::errs() << " #include/#include_next/#import:\n"; + llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; + llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; + llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; + llvm::errs() << " " << NumElse << " #else/#elif.\n"; + llvm::errs() << " " << NumEndif << " #endif.\n"; + llvm::errs() << " " << NumPragma << " #pragma.\n"; + llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; + + llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " << NumFastMacroExpanded << " on the fast path.\n"; - llvm::cerr << (NumFastTokenPaste+NumTokenPaste) + llvm::errs() << (NumFastTokenPaste+NumTokenPaste) << " token paste (##) operations performed, " << NumFastTokenPaste << " on the fast path.\n"; } @@ -201,10 +201,10 @@ std::string Preprocessor::getSpelling(const Token &Tok) const { const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation()); if (!Tok.needsCleaning()) return std::string(TokStart, TokStart+Tok.getLength()); - + std::string Result; Result.reserve(Tok.getLength()); - + // Otherwise, hard case, relex the characters into the string. for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); Ptr != End; ) { @@ -230,7 +230,7 @@ std::string Preprocessor::getSpelling(const Token &Tok) const { unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer) const { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - + // If this token is an identifier, just return the string from the identifier // table, which is very quick. if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { @@ -240,10 +240,10 @@ unsigned Preprocessor::getSpelling(const Token &Tok, // Otherwise, compute the start of the token in the input lexer buffer. const char *TokStart = 0; - + if (Tok.isLiteral()) TokStart = Tok.getLiteralData(); - + if (TokStart == 0) TokStart = SourceMgr.getCharacterData(Tok.getLocation()); @@ -252,7 +252,7 @@ unsigned Preprocessor::getSpelling(const Token &Tok, Buffer = TokStart; return Tok.getLength(); } - + // Otherwise, hard case, relex the characters into the string. char *OutBuf = const_cast<char*>(Buffer); for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); @@ -263,7 +263,7 @@ unsigned Preprocessor::getSpelling(const Token &Tok, } assert(unsigned(OutBuf-Buffer) != Tok.getLength() && "NeedsCleaning flag set on something that didn't need cleaning!"); - + return OutBuf-Buffer; } @@ -273,15 +273,15 @@ unsigned Preprocessor::getSpelling(const Token &Tok, void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, SourceLocation InstantiationLoc) { Tok.setLength(Len); - + const char *DestPtr; SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr); - + if (InstantiationLoc.isValid()) Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, InstantiationLoc, Len); Tok.setLocation(Loc); - + // If this is a literal token, set the pointer data. if (Tok.isLiteral()) Tok.setLiteralData(DestPtr); @@ -290,19 +290,19 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, /// AdvanceToTokenCharacter - Given a location that specifies the start of a /// token, return a new location that specifies a character within the token. -SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, +SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, unsigned CharNo) { // Figure out how many physical characters away the specified instantiation // character is. This needs to take into consideration newlines and // trigraphs. const char *TokPtr = SourceMgr.getCharacterData(TokStart); - + // If they request the first char of the token, we're trivially done. if (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)) return TokStart; - + unsigned PhysOffset = 0; - + // The usual case is that tokens don't contain anything interesting. Skip // over the uninteresting characters. If a token only consists of simple // chars, this method is extremely fast. @@ -311,7 +311,7 @@ SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, return TokStart.getFileLocWithOffset(PhysOffset); ++TokPtr, --CharNo, ++PhysOffset; } - + // If we have a character that may be a trigraph or escaped newline, use a // lexer to parse it correctly. for (; CharNo; --CharNo) { @@ -320,14 +320,14 @@ SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, TokPtr += Size; PhysOffset += Size; } - + // Final detail: if we end up on an escaped newline, we want to return the // location of the actual byte of the token. For example foo\<newline>bar // advanced by 3 should return the location of b, not of \\. One compounding // detail of this is that the escape may be made by a trigraph. if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) PhysOffset = Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; - + return TokStart.getFileLocWithOffset(PhysOffset); } @@ -364,33 +364,33 @@ void Preprocessor::EnterMainSourceFile() { // information) and predefined macros aren't guaranteed to be set properly. assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); FileID MainFileID = SourceMgr.getMainFileID(); - + // Enter the main file source buffer. EnterSourceFile(MainFileID, 0); - + // Tell the header info that the main file was entered. If the file is later // #imported, it won't be re-entered. if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) HeaderInfo.IncrementIncludeCount(FE); - + std::vector<char> PrologFile; PrologFile.reserve(4080); - + // FIXME: Don't make a copy. PrologFile.insert(PrologFile.end(), Predefines.begin(), Predefines.end()); - + // Memory buffer must end with a null byte! PrologFile.push_back(0); // Now that we have emitted the predefined macros, #includes, etc into // PrologFile, preprocess it to populate the initial preprocessor state. - llvm::MemoryBuffer *SB = + llvm::MemoryBuffer *SB = llvm::MemoryBuffer::getMemBufferCopy(&PrologFile.front(),&PrologFile.back(), "<built-in>"); assert(SB && "Cannot fail to create predefined source buffer"); FileID FID = SourceMgr.createFileIDForMemBuffer(SB); assert(!FID.isInvalid() && "Could not create FileID for predefines?"); - + // Start parsing the predefines. EnterSourceFile(FID, 0); } @@ -406,7 +406,7 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier, const char *BufPtr) { assert(Identifier.is(tok::identifier) && "Not an identifier!"); assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!"); - + // Look up this token, see if it is a macro, or if it is a language keyword. IdentifierInfo *II; if (BufPtr && !Identifier.needsCleaning()) { @@ -436,7 +436,7 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier, void Preprocessor::HandleIdentifier(Token &Identifier) { assert(Identifier.getIdentifierInfo() && "Can't handle identifiers without identifier info!"); - + IdentifierInfo &II = *Identifier.getIdentifierInfo(); // If this identifier was poisoned, and if it was not produced from a macro @@ -447,7 +447,7 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { else Diag(Identifier, diag::ext_pp_bad_vaargs_use); } - + // If this is a macro to be expanded, do it. if (MacroInfo *MI = getMacroInfo(&II)) { if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) { diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index f9dfad9c808ec..e005c494763cb 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -26,13 +26,13 @@ void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { // We are now parsing a filename! ParsingFilename = true; - + // Lex the filename. IndirectLex(FilenameTok); // We should have obtained the filename now. ParsingFilename = false; - + // No filename? if (FilenameTok.is(tok::eom)) PP->Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp index 28f3d7ff45b2e..0e98c17519855 100644 --- a/lib/Lex/ScratchBuffer.cpp +++ b/lib/Lex/ScratchBuffer.cpp @@ -38,16 +38,16 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, // Prefix the token with a \n, so that it looks like it is the first thing on // its own virtual line in caret diagnostics. CurBuffer[BytesUsed++] = '\n'; - + // Return a pointer to the character data. DestPtr = CurBuffer+BytesUsed; - + // Copy the token data into the buffer. memcpy(CurBuffer+BytesUsed, Buf, Len); // Remember that we used these bytes. BytesUsed += Len+1; - + // Add a NUL terminator to the token. This keeps the tokens separated, in // case they get relexed, and puts them on their own virtual lines in case a // diagnostic points to one. @@ -62,8 +62,8 @@ void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { // support gigantic tokens, which almost certainly won't happen. :) if (RequestLen < ScratchBufSize) RequestLen = ScratchBufSize; - - llvm::MemoryBuffer *Buf = + + llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getNewMemBuffer(RequestLen, "<scratch space>"); FileID FID = SourceMgr.createFileIDForMemBuffer(Buf); BufferStartLoc = SourceMgr.getLocForStartOfFile(FID); diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp index be13b274574a6..ade7f8516ea7b 100644 --- a/lib/Lex/TokenConcatenation.cpp +++ b/lib/Lex/TokenConcatenation.cpp @@ -13,7 +13,7 @@ #include "clang/Lex/TokenConcatenation.h" #include "clang/Lex/Preprocessor.h" -using namespace clang; +using namespace clang; /// StartsWithL - Return true if the spelling of this token starts with 'L'. @@ -22,14 +22,14 @@ bool TokenConcatenation::StartsWithL(const Token &Tok) const { SourceManager &SM = PP.getSourceManager(); return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L'; } - + if (Tok.getLength() < 256) { char Buffer[256]; const char *TokPtr = Buffer; PP.getSpelling(Tok, TokPtr); return TokPtr[0] == 'L'; } - + return PP.getSpelling(Tok)[0] == 'L'; } @@ -42,21 +42,21 @@ bool TokenConcatenation::IsIdentifierL(const Token &Tok) const { SourceManager &SM = PP.getSourceManager(); return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L'; } - + if (Tok.getLength() < 256) { char Buffer[256]; const char *TokPtr = Buffer; - if (PP.getSpelling(Tok, TokPtr) != 1) + if (PP.getSpelling(Tok, TokPtr) != 1) return false; return TokPtr[0] == 'L'; } - + return PP.getSpelling(Tok) == "L"; } TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { memset(TokenInfo, 0, sizeof(TokenInfo)); - + // These tokens have custom code in AvoidConcat. TokenInfo[tok::identifier ] |= aci_custom; TokenInfo[tok::numeric_constant] |= aci_custom_firstchar; @@ -72,7 +72,7 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { TokenInfo[tok::colon ] |= aci_custom_firstchar; TokenInfo[tok::hash ] |= aci_custom_firstchar; TokenInfo[tok::arrow ] |= aci_custom_firstchar; - + // These tokens change behavior if followed by an '='. TokenInfo[tok::amp ] |= aci_avoid_equal; // &= TokenInfo[tok::plus ] |= aci_avoid_equal; // += @@ -130,29 +130,29 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevTok, // source. If they were, it must be okay to stick them together: if there // were an issue, the tokens would have been lexed differently. if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() && - PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) == + PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) == Tok.getLocation()) return false; - + tok::TokenKind PrevKind = PrevTok.getKind(); if (PrevTok.getIdentifierInfo()) // Language keyword or named operator. PrevKind = tok::identifier; - + // Look up information on when we should avoid concatenation with prevtok. unsigned ConcatInfo = TokenInfo[PrevKind]; - + // If prevtok never causes a problem for anything after it, return quickly. if (ConcatInfo == 0) return false; - + if (ConcatInfo & aci_avoid_equal) { // If the next token is '=' or '==', avoid concatenation. if (Tok.is(tok::equal) || Tok.is(tok::equalequal)) return true; ConcatInfo &= ~aci_avoid_equal; } - + if (ConcatInfo == 0) return false; - + // Basic algorithm: we look at the first character of the second token, and // determine whether it, if appended to the first token, would form (or // would contribute) to a larger token if concatenated. @@ -162,10 +162,10 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevTok, } else { FirstChar = GetFirstChar(PP, Tok); } - + switch (PrevKind) { default: assert(0 && "InitAvoidConcatTokenInfo built wrong"); - case tok::identifier: // id+id or id+number or id+L"foo". + case tok::identifier: // id+id or id+number or id+L"foo". // id+'.'... will not append. if (Tok.is(tok::numeric_constant)) return GetFirstChar(PP, Tok) != '.'; @@ -173,18 +173,18 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevTok, if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* || Tok.is(tok::wide_char_literal)*/) return true; - + // If this isn't identifier + string, we're done. if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal)) return false; - + // FIXME: need a wide_char_constant! - + // If the string was a wide string L"foo" or wide char L'f', it would // concat with the previous identifier into fooL"bar". Avoid this. if (StartsWithL(Tok)) return true; - + // Otherwise, this is a narrow character or string. If the *identifier* // is a literal 'L', avoid pasting L "foo" -> L"foo". return IsIdentifierL(PrevTok); diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index f9f93867c8532..f006f5ae55bb0 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -27,11 +27,11 @@ void TokenLexer::Init(Token &Tok, SourceLocation ILEnd, MacroArgs *Actuals) { // If the client is reusing a TokenLexer, make sure to free any memory // associated with it. destroy(); - + Macro = PP.getMacroInfo(Tok.getIdentifierInfo()); ActualArgs = Actuals; CurToken = 0; - + InstantiateLocStart = Tok.getLocation(); InstantiateLocEnd = ILEnd; AtStartOfLine = Tok.isAtStartOfLine(); @@ -45,7 +45,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ILEnd, MacroArgs *Actuals) { // Tokens to point to the expanded tokens. if (Macro->isFunctionLike() && Macro->getNumArgs()) ExpandFunctionArguments(); - + // Mark the macro as currently disabled, so that it is not recursively // expanded. The macro must be disabled only after argument pre-expansion of // function-like macro arguments occurs. @@ -61,7 +61,7 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks, // If the client is reusing a TokenLexer, make sure to free any memory // associated with it. destroy(); - + Macro = 0; ActualArgs = 0; Tokens = TokArray; @@ -72,7 +72,7 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks, InstantiateLocStart = InstantiateLocEnd = SourceLocation(); AtStartOfLine = false; HasLeadingSpace = false; - + // Set HasLeadingSpace/AtStartOfLine so that the first token will be // returned unmodified. if (NumToks != 0) { @@ -90,7 +90,7 @@ void TokenLexer::destroy() { Tokens = 0; OwnsTokens = false; } - + // TokenLexer owns its formal arguments. if (ActualArgs) ActualArgs->destroy(); } @@ -99,17 +99,17 @@ void TokenLexer::destroy() { /// return preexpanded tokens from Tokens. void TokenLexer::ExpandFunctionArguments() { llvm::SmallVector<Token, 128> ResultToks; - + // Loop through 'Tokens', expanding them into ResultToks. Keep // track of whether we change anything. If not, no need to keep them. If so, // we install the newly expanded sequence as the new 'Tokens' list. bool MadeChange = false; - + // NextTokGetsSpace - When this is true, the next token appended to the // output list will get a leading space, regardless of whether it had one to // begin with or not. This is used for placemarker support. bool NextTokGetsSpace = false; - + for (unsigned i = 0, e = NumTokens; i != e; ++i) { // If we found the stringify operator, get the argument stringified. The // preprocessor already verified that the following token is a macro name @@ -118,7 +118,7 @@ void TokenLexer::ExpandFunctionArguments() { if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) { int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo()); assert(ArgNo != -1 && "Token following # is not an argument?"); - + Token Res; if (CurTok.is(tok::hash)) // Stringify Res = ActualArgs->getStringifiedArgument(ArgNo, PP); @@ -127,19 +127,19 @@ void TokenLexer::ExpandFunctionArguments() { Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), PP, true); } - + // The stringified/charified string leading space flag gets set to match // the #/#@ operator. if (CurTok.hasLeadingSpace() || NextTokGetsSpace) Res.setFlag(Token::LeadingSpace); - + ResultToks.push_back(Res); MadeChange = true; ++i; // Skip arg name. NextTokGetsSpace = false; continue; } - + // Otherwise, if this is not an argument token, just add the token to the // output buffer. IdentifierInfo *II = CurTok.getIdentifierInfo(); @@ -154,17 +154,17 @@ void TokenLexer::ExpandFunctionArguments() { } continue; } - + // An argument is expanded somehow, the result is different than the // input. MadeChange = true; // Otherwise, this is a use of the argument. Find out if there is a paste // (##) operator before or after the argument. - bool PasteBefore = + bool PasteBefore = !ResultToks.empty() && ResultToks.back().is(tok::hashhash); bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash); - + // If it is not the LHS/RHS of a ## operator, we must pre-expand the // argument and substitute the expanded tokens into the result. This is // C99 6.10.3.1p1. @@ -178,13 +178,13 @@ void TokenLexer::ExpandFunctionArguments() { ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; else ResultArgToks = ArgTok; // Use non-preexpanded tokens. - + // If the arg token expanded into anything, append it. if (ResultArgToks->isNot(tok::eof)) { unsigned FirstResult = ResultToks.size(); unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); ResultToks.append(ResultArgToks, ResultArgToks+NumToks); - + // If any tokens were substituted from the argument, the whitespace // before the first token should match the whitespace of the arg // identifier. @@ -199,7 +199,7 @@ void TokenLexer::ExpandFunctionArguments() { } continue; } - + // Okay, we have a token that is either the LHS or RHS of a paste (##) // argument. It gets substituted as its non-pre-expanded tokens. const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); @@ -217,9 +217,9 @@ void TokenLexer::ExpandFunctionArguments() { PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); ResultToks.pop_back(); } - + ResultToks.append(ArgToks, ArgToks+NumToks); - + // If this token (the macro argument) was supposed to get leading // whitespace, transfer this information onto the first token of the // expansion. @@ -233,11 +233,11 @@ void TokenLexer::ExpandFunctionArguments() { if ((CurTok.hasLeadingSpace() || NextTokGetsSpace) && !PasteBefore) ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace); - + NextTokGetsSpace = false; continue; } - + // If an empty argument is on the LHS or RHS of a paste, the standard (C99 // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We // implement this by eating ## operators when a LHS or RHS expands to @@ -250,13 +250,13 @@ void TokenLexer::ExpandFunctionArguments() { ++i; continue; } - + // If this is on the RHS of a paste operator, we've already copied the // paste operator to the ResultToks list. Remove it. assert(PasteBefore && ResultToks.back().is(tok::hashhash)); NextTokGetsSpace |= ResultToks.back().hasLeadingSpace(); ResultToks.pop_back(); - + // If this is the __VA_ARGS__ token, and if the argument wasn't provided, // and if the macro had at least one real argument, and if the token before // the ## was a comma, remove the comma. @@ -271,7 +271,7 @@ void TokenLexer::ExpandFunctionArguments() { } continue; } - + // If anything changed, install this as the new Tokens list. if (MadeChange) { assert(!OwnsTokens && "This would leak if we already own the token list"); @@ -284,7 +284,7 @@ void TokenLexer::ExpandFunctionArguments() { if (NumTokens) memcpy(Res, &ResultToks[0], NumTokens*sizeof(Token)); Tokens = Res; - + // The preprocessor bump pointer owns these tokens, not us. OwnsTokens = false; } @@ -309,16 +309,16 @@ void TokenLexer::Lex(Token &Tok) { // whatever is next. return PPCache.Lex(Tok); } - + // If this is the first token of the expanded result, we inherit spacing // properties later. bool isFirstToken = CurToken == 0; - + // Get the next token to return. Tok = Tokens[CurToken++]; - + bool TokenIsFromPaste = false; - + // If this token is followed by a token paste (##) operator, paste the tokens! if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)) { if (PasteTokens(Tok)) { @@ -328,7 +328,7 @@ void TokenLexer::Lex(Token &Tok) { } else { TokenIsFromPaste = true; } - } + } // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any @@ -337,26 +337,26 @@ void TokenLexer::Lex(Token &Tok) { // that captures all of this. if (InstantiateLocStart.isValid()) { // Don't do this for token streams. SourceManager &SM = PP.getSourceManager(); - Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(), + Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(), InstantiateLocStart, InstantiateLocEnd, Tok.getLength())); } - + // If this is the first token, set the lexical properties of the token to // match the lexical properties of the macro identifier. if (isFirstToken) { Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); } - + // Handle recursive expansion! if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != 0) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. IdentifierInfo *II = Tok.getIdentifierInfo(); Tok.setKind(II->getTokenID()); - + // If this identifier was poisoned and from a paste, emit an error. This // won't be handled by Preprocessor::HandleIdentifier because this is coming // from a macro expansion. @@ -367,7 +367,7 @@ void TokenLexer::Lex(Token &Tok) { else PP.Diag(Tok, diag::err_pp_used_poisoned_id); } - + if (!DisableMacroExpansion && II->isHandleIdentifierCase()) PP.HandleIdentifier(Tok); } @@ -387,33 +387,33 @@ bool TokenLexer::PasteTokens(Token &Tok) { SourceLocation PasteOpLoc = Tokens[CurToken].getLocation(); ++CurToken; assert(!isAtEnd() && "No token on the RHS of a paste operator!"); - + // Get the RHS token. const Token &RHS = Tokens[CurToken]; - + // Allocate space for the result token. This is guaranteed to be enough for // the two tokens. Buffer.resize(Tok.getLength() + RHS.getLength()); - + // Get the spelling of the LHS token in Buffer. const char *BufPtr = &Buffer[0]; unsigned LHSLen = PP.getSpelling(Tok, BufPtr); if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! memcpy(&Buffer[0], BufPtr, LHSLen); - + BufPtr = &Buffer[LHSLen]; unsigned RHSLen = PP.getSpelling(RHS, BufPtr); if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer! memcpy(&Buffer[LHSLen], BufPtr, RHSLen); - + // Trim excess space. Buffer.resize(LHSLen+RHSLen); - + // Plop the pasted result (including the trailing newline and null) into a // scratch buffer where we can lex it. Token ResultTokTmp; ResultTokTmp.startToken(); - + // Claim that the tmp token is a string_literal so that we can get the // character pointer back from CreateString. ResultTokTmp.setKind(tok::string_literal); @@ -423,7 +423,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Lex the resultant pasted token into Result. Token Result; - + if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) { // Common paste case: identifier+identifier = identifier. Avoid creating // a lexer and other overhead. @@ -434,42 +434,42 @@ bool TokenLexer::PasteTokens(Token &Tok) { Result.setLength(LHSLen+RHSLen); } else { PP.IncrementPasteCounter(false); - + assert(ResultTokLoc.isFileID() && "Should be a raw location into scratch buffer"); SourceManager &SourceMgr = PP.getSourceManager(); FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); - + const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first; - + // Make a lexer to lex this string from. Lex just this one token. // Make a lexer object so that we lex and expand the paste result. Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), PP.getLangOptions(), ScratchBufStart, ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); - + // Lex a token in raw mode. This way it won't look up identifiers // automatically, lexing off the end will return an eof token, and // warnings are disabled. This returns true if the result token is the // entire buffer. bool isInvalid = !TL.LexFromRawLexer(Result); - + // If we got an EOF token, we didn't form even ONE token. For example, we // did "/ ## /" to get "//". isInvalid |= Result.is(tok::eof); - + // If pasting the two tokens didn't form a full new token, this is an // error. This occurs with "x ## +" and other stuff. Return with Tok // unmodified and with RHS as the next token to lex. if (isInvalid) { // Test for the Microsoft extension of /##/ turning into // here on the // error path. - if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && + if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && RHS.is(tok::slash)) { HandleMicrosoftCommentPaste(Tok); return true; } - + // Do not emit the warning when preprocessing assembler code. if (!PP.getLangOptions().AsmPreprocessor) { // Explicitly convert the token location to have proper instantiation @@ -481,26 +481,26 @@ bool TokenLexer::PasteTokens(Token &Tok) { PP.Diag(Loc, diag::err_pp_bad_paste) << std::string(Buffer.begin(), Buffer.end()); } - + // Do not consume the RHS. --CurToken; } - + // Turn ## into 'unknown' to avoid # ## # from looking like a paste // operator. if (Result.is(tok::hashhash)) Result.setKind(tok::unknown); } - + // Transfer properties of the LHS over the the Result. Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); - + // Finally, replace LHS with the result, consume the RHS, and iterate. ++CurToken; Tok = Result; } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); - + // Now that we got the result token, it will be subject to expansion. Since // token pasting re-lexes the result token in raw mode, identifier information // isn't looked up. As such, if the result is an identifier, look up id info. @@ -532,11 +532,11 @@ unsigned TokenLexer::isNextTokenLParen() const { void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) { // We 'comment out' the rest of this macro by just ignoring the rest of the // tokens that have not been lexed yet, if any. - + // Since this must be a macro, mark the macro enabled now that it is no longer // being expanded. assert(Macro && "Token streams can't paste comments"); Macro->EnableMacro(); - + PP.HandleMicrosoftCommentPaste(Tok); } |