diff options
Diffstat (limited to 'include/clang/Lex')
-rw-r--r-- | include/clang/Lex/DirectoryLookup.h | 133 | ||||
-rw-r--r-- | include/clang/Lex/HeaderMap.h | 67 | ||||
-rw-r--r-- | include/clang/Lex/HeaderSearch.h | 237 | ||||
-rw-r--r-- | include/clang/Lex/LexDiagnostic.h | 27 | ||||
-rw-r--r-- | include/clang/Lex/Lexer.h | 376 | ||||
-rw-r--r-- | include/clang/Lex/LiteralSupport.h | 176 | ||||
-rw-r--r-- | include/clang/Lex/MacroInfo.h | 218 | ||||
-rw-r--r-- | include/clang/Lex/MultipleIncludeOpt.h | 130 | ||||
-rw-r--r-- | include/clang/Lex/PPCallbacks.h | 122 | ||||
-rw-r--r-- | include/clang/Lex/PTHLexer.h | 104 | ||||
-rw-r--r-- | include/clang/Lex/PTHManager.h | 141 | ||||
-rw-r--r-- | include/clang/Lex/Pragma.h | 90 | ||||
-rw-r--r-- | include/clang/Lex/Preprocessor.h | 801 | ||||
-rw-r--r-- | include/clang/Lex/PreprocessorLexer.h | 161 | ||||
-rw-r--r-- | include/clang/Lex/ScratchBuffer.h | 45 | ||||
-rw-r--r-- | include/clang/Lex/Token.h | 312 | ||||
-rw-r--r-- | include/clang/Lex/TokenConcatenation.h | 73 | ||||
-rw-r--r-- | include/clang/Lex/TokenLexer.h | 154 |
18 files changed, 3367 insertions, 0 deletions
diff --git a/include/clang/Lex/DirectoryLookup.h b/include/clang/Lex/DirectoryLookup.h new file mode 100644 index 0000000000000..618de39233db7 --- /dev/null +++ b/include/clang/Lex/DirectoryLookup.h @@ -0,0 +1,133 @@ +//===--- DirectoryLookup.h - Info for searching for headers -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DirectoryLookup interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_DIRECTORYLOOKUP_H +#define LLVM_CLANG_LEX_DIRECTORYLOOKUP_H + +#include "clang/Basic/SourceManager.h" + +namespace clang { +class HeaderMap; +class DirectoryEntry; +class FileEntry; +class HeaderSearch; + +/// DirectoryLookup - This class represents one entry in the search list that +/// specifies the search order for directories in #include directives. It +/// represents either a directory, a framework, or a headermap. +/// +class DirectoryLookup { +public: + enum LookupType_t { + LT_NormalDir, + LT_Framework, + LT_HeaderMap + }; +private: + union { // This union is discriminated by isHeaderMap. + /// Dir - This is the actual directory that we're referring to for a normal + /// directory or a framework. + const DirectoryEntry *Dir; + + /// Map - This is the HeaderMap if this is a headermap lookup. + /// + const HeaderMap *Map; + } u; + + /// DirCharacteristic - The type of directory this is: this is an instance of + /// SrcMgr::CharacteristicKind. + unsigned DirCharacteristic : 2; + + /// UserSupplied - True if this is a user-supplied directory. + /// + bool UserSupplied : 1; + + /// LookupType - This indicates whether this DirectoryLookup object is a + /// normal directory, a framework, or a headermap. + unsigned LookupType : 2; +public: + /// DirectoryLookup ctor - Note that this ctor *does not take ownership* of + /// 'dir'. + DirectoryLookup(const DirectoryEntry *dir, SrcMgr::CharacteristicKind DT, + bool isUser, bool isFramework) + : DirCharacteristic(DT), UserSupplied(isUser), + LookupType(isFramework ? LT_Framework : LT_NormalDir) { + u.Dir = dir; + } + + /// DirectoryLookup ctor - Note that this ctor *does not take ownership* of + /// 'map'. + DirectoryLookup(const HeaderMap *map, SrcMgr::CharacteristicKind DT, + bool isUser) + : DirCharacteristic(DT), UserSupplied(isUser), LookupType(LT_HeaderMap) { + u.Map = map; + } + + /// getLookupType - Return the kind of directory lookup that this is: either a + /// normal directory, a framework path, or a HeaderMap. + LookupType_t getLookupType() const { return (LookupType_t)LookupType; } + + /// getName - Return the directory or filename corresponding to this lookup + /// object. + const char *getName() const; + + /// getDir - Return the directory that this entry refers to. + /// + const DirectoryEntry *getDir() const { return isNormalDir() ? u.Dir : 0; } + + /// getFrameworkDir - Return the directory that this framework refers to. + /// + const DirectoryEntry *getFrameworkDir() const { + return isFramework() ? u.Dir : 0; + } + + /// getHeaderMap - Return the directory that this entry refers to. + /// + const HeaderMap *getHeaderMap() const { return isHeaderMap() ? u.Map : 0; } + + /// isNormalDir - Return true if this is a normal directory, not a header map. + bool isNormalDir() const { return getLookupType() == LT_NormalDir; } + + /// isFramework - True if this is a framework directory. + /// + bool isFramework() const { return getLookupType() == LT_Framework; } + + /// isHeaderMap - Return true if this is a header map, not a normal directory. + bool isHeaderMap() const { return getLookupType() == LT_HeaderMap; } + + /// DirCharacteristic - The type of directory this is, one of the DirType enum + /// values. + SrcMgr::CharacteristicKind getDirCharacteristic() const { + return (SrcMgr::CharacteristicKind)DirCharacteristic; + } + + /// isUserSupplied - True if this is a user-supplied directory. + /// + bool isUserSupplied() const { return UserSupplied; } + + + /// LookupFile - Lookup the specified file in this search path, returning it + /// if it exists or returning null if not. + const FileEntry *LookupFile(const char *FilenameStart, + const char *FilenameEnd, HeaderSearch &HS) const; + +private: + const FileEntry *DoFrameworkLookup(const char *FilenameStart, + const char *FilenameEnd, + HeaderSearch &HS) const; + +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/HeaderMap.h b/include/clang/Lex/HeaderMap.h new file mode 100644 index 0000000000000..d8033093bd8e7 --- /dev/null +++ b/include/clang/Lex/HeaderMap.h @@ -0,0 +1,67 @@ +//===--- HeaderMap.h - A file that acts like dir of symlinks ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HeaderMap interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_HEADERMAP_H +#define LLVM_CLANG_LEX_HEADERMAP_H + +namespace llvm { + class MemoryBuffer; +} +namespace clang { + class FileEntry; + class FileManager; + struct HMapBucket; + struct HMapHeader; + +/// This class represents an Apple concept known as a 'header map'. To the +/// #include file resolution process, it basically acts like a directory of +/// symlinks to files. Its advantages are that it is dense and more efficient +/// to create and process than a directory of symlinks. +class HeaderMap { + HeaderMap(const HeaderMap&); // DO NOT IMPLEMENT + void operator=(const HeaderMap&); // DO NOT IMPLEMENT + + const llvm::MemoryBuffer *FileBuffer; + bool NeedsBSwap; + + HeaderMap(const llvm::MemoryBuffer *File, bool BSwap) + : FileBuffer(File), NeedsBSwap(BSwap) { + } +public: + ~HeaderMap(); + + /// HeaderMap::Create - This attempts to load the specified file as a header + /// map. If it doesn't look like a HeaderMap, it gives up and returns null. + static const HeaderMap *Create(const FileEntry *FE); + + /// LookupFile - Check to see if the specified relative filename is located in + /// this HeaderMap. If so, open it and return its FileEntry. + const FileEntry *LookupFile(const char *FilenameStart,const char *FilenameEnd, + FileManager &FM) const; + + /// getFileName - Return the filename of the headermap. + const char *getFileName() const; + + /// dump - Print the contents of this headermap to stderr. + void dump() const; + +private: + unsigned getEndianAdjustedWord(unsigned X) const; + const HMapHeader &getHeader() const; + HMapBucket getBucket(unsigned BucketNo) const; + const char *getString(unsigned StrTabIdx) const; +}; + +} // end namespace clang. + +#endif diff --git a/include/clang/Lex/HeaderSearch.h b/include/clang/Lex/HeaderSearch.h new file mode 100644 index 0000000000000..f21aab1b40152 --- /dev/null +++ b/include/clang/Lex/HeaderSearch.h @@ -0,0 +1,237 @@ +//===--- HeaderSearch.h - Resolve Header File Locations ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HeaderSearch interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_HEADERSEARCH_H +#define LLVM_CLANG_LEX_HEADERSEARCH_H + +#include "clang/Lex/DirectoryLookup.h" +#include "llvm/ADT/StringMap.h" +#include <vector> + +namespace clang { + +class ExternalIdentifierLookup; +class FileEntry; +class FileManager; +class IdentifierInfo; + +/// HeaderFileInfo - The preprocessor keeps track of this information for each +/// file that is #included. +struct HeaderFileInfo { + /// isImport - True if this is a #import'd or #pragma once file. + bool isImport : 1; + + /// DirInfo - Keep track of whether this is a system header, and if so, + /// whether it is C++ clean or not. This can be set by the include paths or + /// by #pragma gcc system_header. This is an instance of + /// SrcMgr::CharacteristicKind. + unsigned DirInfo : 2; + + /// NumIncludes - This is the number of times the file has been included + /// already. + unsigned short NumIncludes; + + /// ControllingMacro - If this file has a #ifndef XXX (or equivalent) guard + /// that protects the entire contents of the file, this is the identifier + /// for the macro that controls whether or not it has any effect. + /// + /// Note: Most clients should use getControllingMacro() to access + /// the controlling macro of this header, since + /// getControllingMacro() is able to load a controlling macro from + /// external storage. + const IdentifierInfo *ControllingMacro; + + /// \brief The ID number of the controlling macro. + /// + /// This ID number will be non-zero when there is a controlling + /// macro whose IdentifierInfo may not yet have been loaded from + /// external storage. + unsigned ControllingMacroID; + + HeaderFileInfo() + : isImport(false), DirInfo(SrcMgr::C_User), + NumIncludes(0), ControllingMacro(0), ControllingMacroID(0) {} + + /// \brief Retrieve the controlling macro for this header file, if + /// any. + const IdentifierInfo *getControllingMacro(ExternalIdentifierLookup *External); +}; + +/// HeaderSearch - This class encapsulates the information needed to find the +/// file referenced by a #include or #include_next, (sub-)framework lookup, etc. +class HeaderSearch { + FileManager &FileMgr; + + /// #include search path information. Requests for #include "x" search the + /// directory of the #including file first, then each directory in SearchDirs + /// consequtively. Requests for <x> search the current dir first, then each + /// directory in SearchDirs, starting at SystemDirIdx, consequtively. If + /// NoCurDirSearch is true, then the check for the file in the current + /// directory is supressed. + std::vector<DirectoryLookup> SearchDirs; + unsigned SystemDirIdx; + bool NoCurDirSearch; + + /// FileInfo - This contains all of the preprocessor-specific data about files + /// that are included. The vector is indexed by the FileEntry's UID. + /// + std::vector<HeaderFileInfo> FileInfo; + + /// LookupFileCache - This is keeps track of each lookup performed by + /// LookupFile. The first part of the value is the starting index in + /// SearchDirs that the cached search was performed from. If there is a hit + /// and this value doesn't match the current query, the cache has to be + /// ignored. The second value is the entry in SearchDirs that satisfied the + /// query. + llvm::StringMap<std::pair<unsigned, unsigned> > LookupFileCache; + + + /// FrameworkMap - This is a collection mapping a framework or subframework + /// name like "Carbon" to the Carbon.framework directory. + llvm::StringMap<const DirectoryEntry *> FrameworkMap; + + /// HeaderMaps - This is a mapping from FileEntry -> HeaderMap, uniquing + /// headermaps. This vector owns the headermap. + std::vector<std::pair<const FileEntry*, const HeaderMap*> > HeaderMaps; + + /// \brief Entity used to resolve the identifier IDs of controlling + /// macros into IdentifierInfo pointers, as needed. + ExternalIdentifierLookup *ExternalLookup; + + // Various statistics we track for performance analysis. + unsigned NumIncluded; + unsigned NumMultiIncludeFileOptzn; + unsigned NumFrameworkLookups, NumSubFrameworkLookups; + + // HeaderSearch doesn't support default or copy construction. + explicit HeaderSearch(); + explicit HeaderSearch(const HeaderSearch&); + void operator=(const HeaderSearch&); +public: + HeaderSearch(FileManager &FM); + ~HeaderSearch(); + + FileManager &getFileMgr() const { return FileMgr; } + + /// SetSearchPaths - Interface for setting the file search paths. + /// + void SetSearchPaths(const std::vector<DirectoryLookup> &dirs, + unsigned systemDirIdx, bool noCurDirSearch) { + SearchDirs = dirs; + SystemDirIdx = systemDirIdx; + NoCurDirSearch = noCurDirSearch; + //LookupFileCache.clear(); + } + + /// ClearFileInfo - Forget everything we know about headers so far. + void ClearFileInfo() { + FileInfo.clear(); + } + + void SetExternalLookup(ExternalIdentifierLookup *EIL) { + ExternalLookup = EIL; + } + + /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, + /// return null on failure. isAngled indicates whether the file reference is + /// a <> reference. If successful, this returns 'UsedDir', the + /// DirectoryLookup member the file was found in, or null if not applicable. + /// If CurDir is non-null, the file was found in the specified directory + /// search location. This is used to implement #include_next. CurFileEnt, if + /// non-null, indicates where the #including file is, in case a relative + /// search is needed. + const FileEntry *LookupFile(const char *FilenameStart, + const char *FilenameEnd, bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir, + const FileEntry *CurFileEnt); + + /// LookupSubframeworkHeader - Look up a subframework for the specified + /// #include file. For example, if #include'ing <HIToolbox/HIToolbox.h> from + /// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox + /// is a subframework within Carbon.framework. If so, return the FileEntry + /// for the designated file, otherwise return null. + const FileEntry *LookupSubframeworkHeader(const char *FilenameStart, + const char *FilenameEnd, + const FileEntry *RelativeFileEnt); + + /// LookupFrameworkCache - Look up the specified framework name in our + /// framework cache, returning the DirectoryEntry it is in if we know, + /// otherwise, return null. + const DirectoryEntry *&LookupFrameworkCache(const char *FWNameStart, + const char *FWNameEnd) { + return FrameworkMap.GetOrCreateValue(FWNameStart, FWNameEnd).getValue(); + } + + /// ShouldEnterIncludeFile - Mark the specified file as a target of of a + /// #include, #include_next, or #import directive. Return false if #including + /// the file will have no effect or true if we should include it. + bool ShouldEnterIncludeFile(const FileEntry *File, bool isImport); + + + /// getFileDirFlavor - Return whether the specified file is a normal header, + /// a system header, or a C++ friendly system header. + SrcMgr::CharacteristicKind getFileDirFlavor(const FileEntry *File) { + return (SrcMgr::CharacteristicKind)getFileInfo(File).DirInfo; + } + + /// MarkFileIncludeOnce - Mark the specified file as a "once only" file, e.g. + /// due to #pragma once. + void MarkFileIncludeOnce(const FileEntry *File) { + getFileInfo(File).isImport = true; + } + + /// MarkFileSystemHeader - Mark the specified file as a system header, e.g. + /// due to #pragma GCC system_header. + void MarkFileSystemHeader(const FileEntry *File) { + getFileInfo(File).DirInfo = SrcMgr::C_System; + } + + /// IncrementIncludeCount - Increment the count for the number of times the + /// specified FileEntry has been entered. + void IncrementIncludeCount(const FileEntry *File) { + ++getFileInfo(File).NumIncludes; + } + + /// SetFileControllingMacro - Mark the specified file as having a controlling + /// macro. This is used by the multiple-include optimization to eliminate + /// no-op #includes. + void SetFileControllingMacro(const FileEntry *File, + const IdentifierInfo *ControllingMacro) { + getFileInfo(File).ControllingMacro = ControllingMacro; + } + + /// CreateHeaderMap - This method returns a HeaderMap for the specified + /// FileEntry, uniquing them through the the 'HeaderMaps' datastructure. + const HeaderMap *CreateHeaderMap(const FileEntry *FE); + + void IncrementFrameworkLookupCount() { ++NumFrameworkLookups; } + + typedef std::vector<HeaderFileInfo>::iterator header_file_iterator; + header_file_iterator header_file_begin() { return FileInfo.begin(); } + header_file_iterator header_file_end() { return FileInfo.end(); } + + // Used by PCHReader. + void setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID); + + void PrintStats(); +private: + + /// getFileInfo - Return the HeaderFileInfo structure for the specified + /// FileEntry. + HeaderFileInfo &getFileInfo(const FileEntry *FE); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/LexDiagnostic.h b/include/clang/Lex/LexDiagnostic.h new file mode 100644 index 0000000000000..1502efb55e63c --- /dev/null +++ b/include/clang/Lex/LexDiagnostic.h @@ -0,0 +1,27 @@ +//===--- DiagnosticLex.h - Diagnostics for liblex ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_DIAGNOSTICLEX_H +#define LLVM_CLANG_DIAGNOSTICLEX_H + +#include "clang/Basic/Diagnostic.h" + +namespace clang { + namespace diag { + enum { +#define DIAG(ENUM,FLAGS,DEFAULT_MAPPING,DESC,GROUP) ENUM, +#define LEXSTART +#include "clang/Basic/DiagnosticLexKinds.inc" +#undef DIAG + NUM_BUILTIN_LEX_DIAGNOSTICS + }; + } // end namespace diag +} // end namespace clang + +#endif diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h new file mode 100644 index 0000000000000..3a73147152afc --- /dev/null +++ b/include/clang/Lex/Lexer.h @@ -0,0 +1,376 @@ +//===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Lexer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEXER_H +#define LLVM_CLANG_LEXER_H + +#include "clang/Lex/PreprocessorLexer.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/SmallVector.h" +#include <string> +#include <vector> +#include <cassert> + +namespace clang { +class Diagnostic; +class SourceManager; +class Preprocessor; +class DiagnosticBuilder; + +/// Lexer - This provides a simple interface that turns a text buffer into a +/// stream of tokens. This provides no support for file reading or buffering, +/// or buffering/seeking of tokens, only forward lexing is supported. It relies +/// on the specified Preprocessor object to handle preprocessor directives, etc. +class Lexer : public PreprocessorLexer { + //===--------------------------------------------------------------------===// + // Constant configuration values for this lexer. + const char *BufferStart; // Start of the buffer. + const char *BufferEnd; // End of the buffer. + SourceLocation FileLoc; // Location for start of file. + LangOptions Features; // Features enabled by this language (cache). + bool Is_PragmaLexer; // True if lexer for _Pragma handling. + + //===--------------------------------------------------------------------===// + // Context-specific lexing flags set by the preprocessor. + // + + /// ExtendedTokenMode - The lexer can optionally keep comments and whitespace + /// and return them as tokens. This is used for -C and -CC modes, and + /// whitespace preservation can be useful for some clients that want to lex + /// the file in raw mode and get every character from the file. + /// + /// When this is set to 2 it returns comments and whitespace. When set to 1 + /// it returns comments, when it is set to 0 it returns normal tokens only. + unsigned char ExtendedTokenMode; + + //===--------------------------------------------------------------------===// + // Context that changes as the file is lexed. + // NOTE: any state that mutates when in raw mode must have save/restore code + // in Lexer::isNextPPTokenLParen. + + // BufferPtr - Current pointer into the buffer. This is the next character + // to be lexed. + const char *BufferPtr; + + // IsAtStartOfLine - True if the next lexed token should get the "start of + // line" flag set on it. + bool IsAtStartOfLine; + + Lexer(const Lexer&); // DO NOT IMPLEMENT + void operator=(const Lexer&); // DO NOT IMPLEMENT + friend class Preprocessor; + + void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd); +public: + + /// Lexer constructor - Create a new lexer object for the specified buffer + /// with the specified preprocessor managing the lexing process. This lexer + /// assumes that the associated file buffer and Preprocessor objects will + /// outlive it, so it doesn't take ownership of either of them. + Lexer(FileID FID, Preprocessor &PP); + + /// Lexer constructor - Create a new raw lexer object. This object is only + /// suitable for calls to 'LexRawToken'. This lexer assumes that the text + /// range will outlive it, so it doesn't take ownership of it. + Lexer(SourceLocation FileLoc, const LangOptions &Features, + const char *BufStart, const char *BufPtr, const char *BufEnd); + + /// Lexer constructor - Create a new raw lexer object. This object is only + /// suitable for calls to 'LexRawToken'. This lexer assumes that the text + /// range will outlive it, so it doesn't take ownership of it. + Lexer(FileID FID, const SourceManager &SM, const LangOptions &Features); + + /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for + /// _Pragma expansion. This has a variety of magic semantics that this method + /// sets up. It returns a new'd Lexer that must be delete'd when done. + static Lexer *Create_PragmaLexer(SourceLocation SpellingLoc, + SourceLocation InstantiationLocStart, + SourceLocation InstantiationLocEnd, + unsigned TokLen, Preprocessor &PP); + + + /// getFeatures - Return the language features currently enabled. NOTE: this + /// lexer modifies features as a file is parsed! + const LangOptions &getFeatures() const { return Features; } + + /// getFileLoc - Return the File Location for the file we are lexing out of. + /// The physical location encodes the location where the characters come from, + /// the virtual location encodes where we should *claim* the characters came + /// from. Currently this is only used by _Pragma handling. + SourceLocation getFileLoc() const { return FileLoc; } + + /// Lex - Return the next token in the file. If this is the end of file, it + /// return the tok::eof token. Return true if an error occurred and + /// compilation should terminate, false if normal. This implicitly involves + /// the preprocessor. + void Lex(Token &Result) { + // Start a new token. + Result.startToken(); + + // NOTE, any changes here should also change code after calls to + // Preprocessor::HandleDirective + if (IsAtStartOfLine) { + Result.setFlag(Token::StartOfLine); + IsAtStartOfLine = false; + } + + // Get a token. Note that this may delete the current lexer if the end of + // file is reached. + LexTokenInternal(Result); + } + + /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma. + bool isPragmaLexer() const { return Is_PragmaLexer; } + + /// IndirectLex - An indirect call to 'Lex' that can be invoked via + /// the PreprocessorLexer interface. + void IndirectLex(Token &Result) { Lex(Result); } + + /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no + /// associated preprocessor object. Return true if the 'next character to + /// read' pointer points at the end of the lexer buffer, false otherwise. + bool LexFromRawLexer(Token &Result) { + assert(LexingRawMode && "Not already in raw mode!"); + Lex(Result); + // Note that lexing to the end of the buffer doesn't implicitly delete the + // lexer when in raw mode. + return BufferPtr == BufferEnd; + } + + /// isKeepWhitespaceMode - Return true if the lexer should return tokens for + /// every character in the file, including whitespace and comments. This + /// should only be used in raw mode, as the preprocessor is not prepared to + /// deal with the excess tokens. + bool isKeepWhitespaceMode() const { + return ExtendedTokenMode > 1; + } + + /// SetKeepWhitespaceMode - This method lets clients enable or disable + /// whitespace retention mode. + void SetKeepWhitespaceMode(bool Val) { + assert((!Val || LexingRawMode) && + "Can only enable whitespace retention in raw mode"); + ExtendedTokenMode = Val ? 2 : 0; + } + + /// inKeepCommentMode - Return true if the lexer should return comments as + /// tokens. + bool inKeepCommentMode() const { + return ExtendedTokenMode > 0; + } + + /// SetCommentRetentionMode - Change the comment retention mode of the lexer + /// to the specified mode. This is really only useful when lexing in raw + /// mode, because otherwise the lexer needs to manage this. + void SetCommentRetentionState(bool Mode) { + assert(!isKeepWhitespaceMode() && + "Can't play with comment retention state when retaining whitespace"); + ExtendedTokenMode = Mode ? 1 : 0; + } + + const char *getBufferStart() const { return BufferStart; } + + /// ReadToEndOfLine - Read the rest of the current preprocessor line as an + /// uninterpreted string. This switches the lexer out of directive mode. + std::string ReadToEndOfLine(); + + + /// Diag - Forwarding function for diagnostics. This translate a source + /// position in the current buffer into a SourceLocation object for rendering. + DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const; + + /// getSourceLocation - Return a source location identifier for the specified + /// offset in the current file. + SourceLocation getSourceLocation(const char *Loc, unsigned TokLen = 1) const; + + /// getSourceLocation - Return a source location for the next character in + /// the current file. + SourceLocation getSourceLocation() { return getSourceLocation(BufferPtr); } + + /// Stringify - Convert the specified string into a C string by escaping '\' + /// and " characters. This does not add surrounding ""'s to the string. + /// If Charify is true, this escapes the ' character instead of ". + static std::string Stringify(const std::string &Str, bool Charify = false); + + /// Stringify - Convert the specified string into a C string by escaping '\' + /// and " characters. This does not add surrounding ""'s to the string. + static void Stringify(llvm::SmallVectorImpl<char> &Str); + + /// MeasureTokenLength - Relex the token at the specified location and return + /// its length in bytes in the input file. If the token needs cleaning (e.g. + /// includes a trigraph or an escaped newline) then this count includes bytes + /// that are part of that. + static unsigned MeasureTokenLength(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts); + + //===--------------------------------------------------------------------===// + // Internal implementation interfaces. +private: + + /// LexTokenInternal - Internal interface to lex a preprocessing token. Called + /// by Lex. + /// + void LexTokenInternal(Token &Result); + + /// FormTokenWithChars - When we lex a token, we have identified a span + /// starting at BufferPtr, going to TokEnd that forms the token. This method + /// takes that range and assigns it to the token as its location and size. In + /// addition, since tokens cannot overlap, this also updates BufferPtr to be + /// TokEnd. + void FormTokenWithChars(Token &Result, const char *TokEnd, + tok::TokenKind Kind) { + unsigned TokLen = TokEnd-BufferPtr; + Result.setLength(TokLen); + Result.setLocation(getSourceLocation(BufferPtr, TokLen)); + Result.setKind(Kind); + BufferPtr = TokEnd; + } + + /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a + /// tok::l_paren token, 0 if it is something else and 2 if there are no more + /// tokens in the buffer controlled by this lexer. + unsigned isNextPPTokenLParen(); + + //===--------------------------------------------------------------------===// + // Lexer character reading interfaces. +public: + + // This lexer is built on two interfaces for reading characters, both of which + // automatically provide phase 1/2 translation. getAndAdvanceChar is used + // when we know that we will be reading a character from the input buffer and + // that this character will be part of the result token. This occurs in (f.e.) + // string processing, because we know we need to read until we find the + // closing '"' character. + // + // The second interface is the combination of PeekCharAndSize with + // ConsumeChar. PeekCharAndSize reads a phase 1/2 translated character, + // returning it and its size. If the lexer decides that this character is + // part of the current token, it calls ConsumeChar on it. This two stage + // approach allows us to emit diagnostics for characters (e.g. warnings about + // trigraphs), knowing that they only are emitted if the character is + // consumed. + + /// isObviouslySimpleCharacter - Return true if the specified character is + /// obviously the same in translation phase 1 and translation phase 3. This + /// can return false for characters that end up being the same, but it will + /// never return true for something that needs to be mapped. + static bool isObviouslySimpleCharacter(char C) { + return C != '?' && C != '\\'; + } + + /// getAndAdvanceChar - Read a single 'character' from the specified buffer, + /// advance over it, and return it. This is tricky in several cases. Here we + /// just handle the trivial case and fall-back to the non-inlined + /// getCharAndSizeSlow method to handle the hard case. + inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) { + // If this is not a trigraph and not a UCN or escaped newline, return + // quickly. + if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++; + + unsigned Size = 0; + char C = getCharAndSizeSlow(Ptr, Size, &Tok); + Ptr += Size; + return C; + } + +private: + /// ConsumeChar - When a character (identified by PeekCharAndSize) is consumed + /// and added to a given token, check to see if there are diagnostics that + /// need to be emitted or flags that need to be set on the token. If so, do + /// it. + const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) { + // Normal case, we consumed exactly one token. Just return it. + if (Size == 1) + return Ptr+Size; + + // Otherwise, re-lex the character with a current token, allowing + // diagnostics to be emitted and flags to be set. + Size = 0; + getCharAndSizeSlow(Ptr, Size, &Tok); + return Ptr+Size; + } + + /// getCharAndSize - Peek a single 'character' from the specified buffer, + /// get its size, and return it. This is tricky in several cases. Here we + /// just handle the trivial case and fall-back to the non-inlined + /// getCharAndSizeSlow method to handle the hard case. + inline char getCharAndSize(const char *Ptr, unsigned &Size) { + // If this is not a trigraph and not a UCN or escaped newline, return + // quickly. + if (isObviouslySimpleCharacter(Ptr[0])) { + Size = 1; + return *Ptr; + } + + Size = 0; + return getCharAndSizeSlow(Ptr, Size); + } + + /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize + /// method. + char getCharAndSizeSlow(const char *Ptr, unsigned &Size, Token *Tok = 0); +public: + + /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever + /// emit a warning. + static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, + const LangOptions &Features) { + // If this is not a trigraph and not a UCN or escaped newline, return + // quickly. + if (isObviouslySimpleCharacter(Ptr[0])) { + Size = 1; + return *Ptr; + } + + Size = 0; + return getCharAndSizeSlowNoWarn(Ptr, Size, Features); + } + + /// getEscapedNewLineSize - Return the size of the specified escaped newline, + /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry + /// to this function. + static unsigned getEscapedNewLineSize(const char *P); + + /// SkipEscapedNewLines - If P points to an escaped newline (or a series of + /// them), skip over them and return the first non-escaped-newline found, + /// otherwise return P. + static const char *SkipEscapedNewLines(const char *P); +private: + + /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a + /// diagnostic. + static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, + const LangOptions &Features); + + //===--------------------------------------------------------------------===// + // Other lexer functions. + + // Helper functions to lex the remainder of a token of the specific type. + void LexIdentifier (Token &Result, const char *CurPtr); + void LexNumericConstant (Token &Result, const char *CurPtr); + void LexStringLiteral (Token &Result, const char *CurPtr,bool Wide); + void LexAngledStringLiteral(Token &Result, const char *CurPtr); + void LexCharConstant (Token &Result, const char *CurPtr); + bool LexEndOfFile (Token &Result, const char *CurPtr); + + bool SkipWhitespace (Token &Result, const char *CurPtr); + bool SkipBCPLComment (Token &Result, const char *CurPtr); + bool SkipBlockComment (Token &Result, const char *CurPtr); + bool SaveBCPLComment (Token &Result, const char *CurPtr); +}; + + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h new file mode 100644 index 0000000000000..8ee8ecf7359fe --- /dev/null +++ b/include/clang/Lex/LiteralSupport.h @@ -0,0 +1,176 @@ +//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the NumericLiteralParser, CharLiteralParser, and +// StringLiteralParser interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LITERALSUPPORT_H +#define CLANG_LITERALSUPPORT_H + +#include <string> +#include "llvm/ADT/SmallString.h" + +namespace llvm { + class APInt; + class APFloat; + struct fltSemantics; +} + +namespace clang { + +class Diagnostic; +class Preprocessor; +class Token; +class SourceLocation; +class TargetInfo; + +/// NumericLiteralParser - This performs strict semantic analysis of the content +/// of a ppnumber, classifying it as either integer, floating, or erroneous, +/// determines the radix of the value and can convert it to a useful value. +class NumericLiteralParser { + Preprocessor &PP; // needed for diagnostics + + const char *const ThisTokBegin; + const char *const ThisTokEnd; + const char *DigitsBegin, *SuffixBegin; // markers + const char *s; // cursor + + unsigned radix; + + bool saw_exponent, saw_period; + +public: + NumericLiteralParser(const char *begin, const char *end, + SourceLocation Loc, Preprocessor &PP); + bool hadError; + bool isUnsigned; + bool isLong; // This is *not* set for long long. + bool isLongLong; + bool isFloat; // 1.0f + bool isImaginary; // 1.0i + + bool isIntegerLiteral() const { + return !saw_period && !saw_exponent; + } + bool isFloatingLiteral() const { + return saw_period || saw_exponent; + } + bool hasSuffix() const { + return SuffixBegin != ThisTokEnd; + } + + unsigned getRadix() const { return radix; } + + /// GetIntegerValue - Convert this numeric literal value to an APInt that + /// matches Val's input width. If there is an overflow (i.e., if the unsigned + /// value read is larger than the APInt's bits will hold), set Val to the low + /// bits of the result and return true. Otherwise, return false. + bool GetIntegerValue(llvm::APInt &Val); + + /// GetFloatValue - Convert this numeric literal to a floating value, using + /// the specified APFloat fltSemantics (specifying float, double, etc). + /// The optional bool isExact (passed-by-reference) has its value + /// set to true if the returned APFloat can represent the number in the + /// literal exactly, and false otherwise. + llvm::APFloat GetFloatValue(const llvm::fltSemantics &Format, + bool* isExact = NULL); + +private: + + void ParseNumberStartingWithZero(SourceLocation TokLoc); + + /// SkipHexDigits - Read and skip over any hex digits, up to End. + /// Return a pointer to the first non-hex digit or End. + const char *SkipHexDigits(const char *ptr) { + while (ptr != ThisTokEnd && isxdigit(*ptr)) + ptr++; + return ptr; + } + + /// SkipOctalDigits - Read and skip over any octal digits, up to End. + /// Return a pointer to the first non-hex digit or End. + const char *SkipOctalDigits(const char *ptr) { + while (ptr != ThisTokEnd && ((*ptr >= '0') && (*ptr <= '7'))) + ptr++; + return ptr; + } + + /// SkipDigits - Read and skip over any digits, up to End. + /// Return a pointer to the first non-hex digit or End. + const char *SkipDigits(const char *ptr) { + while (ptr != ThisTokEnd && isdigit(*ptr)) + ptr++; + return ptr; + } + + /// SkipBinaryDigits - Read and skip over any binary digits, up to End. + /// Return a pointer to the first non-binary digit or End. + const char *SkipBinaryDigits(const char *ptr) { + while (ptr != ThisTokEnd && (*ptr == '0' || *ptr == '1')) + ptr++; + return ptr; + } + +}; + +/// CharLiteralParser - Perform interpretation and semantic analysis of a +/// character literal. +class CharLiteralParser { + uint64_t Value; + bool IsWide; + bool IsMultiChar; + bool HadError; +public: + CharLiteralParser(const char *begin, const char *end, + SourceLocation Loc, Preprocessor &PP); + + bool hadError() const { return HadError; } + bool isWide() const { return IsWide; } + bool isMultiChar() const { return IsMultiChar; } + uint64_t getValue() const { return Value; } +}; + +/// StringLiteralParser - This decodes string escape characters and performs +/// wide string analysis and Translation Phase #6 (concatenation of string +/// literals) (C99 5.1.1.2p1). +class StringLiteralParser { + Preprocessor &PP; + + unsigned MaxTokenLength; + unsigned SizeBound; + unsigned wchar_tByteWidth; + llvm::SmallString<512> ResultBuf; + char *ResultPtr; // cursor +public: + StringLiteralParser(const Token *StringToks, unsigned NumStringToks, + Preprocessor &PP); + bool hadError; + bool AnyWide; + bool Pascal; + + const char *GetString() { return &ResultBuf[0]; } + unsigned GetStringLength() const { return ResultPtr-&ResultBuf[0]; } + + unsigned GetNumStringChars() const { + if (AnyWide) + return GetStringLength() / wchar_tByteWidth; + return GetStringLength(); + } + /// getOffsetOfStringByte - This function returns the offset of the + /// specified byte of the string data represented by Token. This handles + /// advancing over escape sequences in the string. + static unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo, + Preprocessor &PP); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/MacroInfo.h b/include/clang/Lex/MacroInfo.h new file mode 100644 index 0000000000000..ccd13c80d3546 --- /dev/null +++ b/include/clang/Lex/MacroInfo.h @@ -0,0 +1,218 @@ +//===--- MacroInfo.h - Information about #defined identifiers ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroInfo interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_MACROINFO_H +#define LLVM_CLANG_MACROINFO_H + +#include "clang/Lex/Token.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include <vector> +#include <cassert> + +namespace clang { + class Preprocessor; + +/// MacroInfo - Each identifier that is #define'd has an instance of this class +/// associated with it, used to implement macro expansion. +class MacroInfo { + //===--------------------------------------------------------------------===// + // State set when the macro is defined. + + /// Location - This is the place the macro is defined. + SourceLocation Location; + /// EndLocation - The location of the last token in the macro. + SourceLocation EndLocation; + + /// Arguments - The list of arguments for a function-like macro. This can be + /// empty, for, e.g. "#define X()". In a C99-style variadic macro, this + /// includes the __VA_ARGS__ identifier on the list. + IdentifierInfo **ArgumentList; + unsigned NumArguments; + + /// ReplacementTokens - This is the list of tokens that the macro is defined + /// to. + llvm::SmallVector<Token, 8> ReplacementTokens; + + /// IsFunctionLike - True if this macro is a function-like macro, false if it + /// is an object-like macro. + bool IsFunctionLike : 1; + + /// IsC99Varargs - True if this macro is of the form "#define X(...)" or + /// "#define X(Y,Z,...)". The __VA_ARGS__ token should be replaced with the + /// contents of "..." in an invocation. + bool IsC99Varargs : 1; + + /// IsGNUVarargs - True if this macro is of the form "#define X(a...)". The + /// "a" identifier in the replacement list will be replaced with all arguments + /// of the macro starting with the specified one. + bool IsGNUVarargs : 1; + + /// IsBuiltinMacro - True if this is a builtin macro, such as __LINE__, and if + /// it has not yet been redefined or undefined. + bool IsBuiltinMacro : 1; + +private: + //===--------------------------------------------------------------------===// + // State that changes as the macro is used. + + /// IsDisabled - True if we have started an expansion of this macro already. + /// This disbles recursive expansion, which would be quite bad for things like + /// #define A A. + bool IsDisabled : 1; + + /// IsUsed - True if this macro is either defined in the main file and has + /// been used, or if it is not defined in the main file. This is used to + /// emit -Wunused-macros diagnostics. + bool IsUsed : 1; + + ~MacroInfo() { + assert(ArgumentList == 0 && "Didn't call destroy before dtor!"); + } + +public: + MacroInfo(SourceLocation DefLoc); + + /// FreeArgumentList - Free the argument list of the macro, restoring it to a + /// state where it can be reused for other devious purposes. + void FreeArgumentList(llvm::BumpPtrAllocator &PPAllocator) { + PPAllocator.Deallocate(ArgumentList); + ArgumentList = 0; + NumArguments = 0; + } + + /// Destroy - destroy this MacroInfo object. + void Destroy(llvm::BumpPtrAllocator &PPAllocator) { + FreeArgumentList(PPAllocator); + this->~MacroInfo(); + } + + /// getDefinitionLoc - Return the location that the macro was defined at. + /// + SourceLocation getDefinitionLoc() const { return Location; } + + /// setDefinitionEndLoc - Set the location of the last token in the macro. + /// + void setDefinitionEndLoc(SourceLocation EndLoc) { EndLocation = EndLoc; } + /// getDefinitionEndLoc - Return the location of the last token in the macro. + /// + SourceLocation getDefinitionEndLoc() const { return EndLocation; } + + /// isIdenticalTo - Return true if the specified macro definition is equal to + /// this macro in spelling, arguments, and whitespace. This is used to emit + /// duplicate definition warnings. This implements the rules in C99 6.10.3. + bool isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const; + + /// setIsBuiltinMacro - Set or clear the isBuiltinMacro flag. + /// + void setIsBuiltinMacro(bool Val = true) { + IsBuiltinMacro = Val; + } + + /// setIsUsed - Set the value of the IsUsed flag. + /// + void setIsUsed(bool Val) { + IsUsed = Val; + } + + /// setArgumentList - Set the specified list of identifiers as the argument + /// list for this macro. + void setArgumentList(IdentifierInfo* const *List, unsigned NumArgs, + llvm::BumpPtrAllocator &PPAllocator) { + assert(ArgumentList == 0 && NumArguments == 0 && + "Argument list already set!"); + if (NumArgs == 0) return; + + NumArguments = NumArgs; + ArgumentList = PPAllocator.Allocate<IdentifierInfo*>(NumArgs); + for (unsigned i = 0; i != NumArgs; ++i) + ArgumentList[i] = List[i]; + } + + /// Arguments - The list of arguments for a function-like macro. This can be + /// empty, for, e.g. "#define X()". + typedef IdentifierInfo* const *arg_iterator; + bool arg_empty() const { return NumArguments == 0; } + arg_iterator arg_begin() const { return ArgumentList; } + arg_iterator arg_end() const { return ArgumentList+NumArguments; } + unsigned getNumArgs() const { return NumArguments; } + + /// getArgumentNum - Return the argument number of the specified identifier, + /// or -1 if the identifier is not a formal argument identifier. + int getArgumentNum(IdentifierInfo *Arg) const { + for (arg_iterator I = arg_begin(), E = arg_end(); I != E; ++I) + if (*I == Arg) return I-arg_begin(); + return -1; + } + + /// Function/Object-likeness. Keep track of whether this macro has formal + /// parameters. + void setIsFunctionLike() { IsFunctionLike = true; } + bool isFunctionLike() const { return IsFunctionLike; } + bool isObjectLike() const { return !IsFunctionLike; } + + /// Varargs querying methods. This can only be set for function-like macros. + void setIsC99Varargs() { IsC99Varargs = true; } + void setIsGNUVarargs() { IsGNUVarargs = true; } + bool isC99Varargs() const { return IsC99Varargs; } + bool isGNUVarargs() const { return IsGNUVarargs; } + bool isVariadic() const { return IsC99Varargs | IsGNUVarargs; } + + /// isBuiltinMacro - Return true if this macro is a builtin macro, such as + /// __LINE__, which requires processing before expansion. + bool isBuiltinMacro() const { return IsBuiltinMacro; } + + /// isUsed - Return false if this macro is defined in the main file and has + /// not yet been used. + bool isUsed() const { return IsUsed; } + + /// getNumTokens - Return the number of tokens that this macro expands to. + /// + unsigned getNumTokens() const { + return ReplacementTokens.size(); + } + + const Token &getReplacementToken(unsigned Tok) const { + assert(Tok < ReplacementTokens.size() && "Invalid token #"); + return ReplacementTokens[Tok]; + } + + typedef llvm::SmallVector<Token, 8>::const_iterator tokens_iterator; + tokens_iterator tokens_begin() const { return ReplacementTokens.begin(); } + tokens_iterator tokens_end() const { return ReplacementTokens.end(); } + bool tokens_empty() const { return ReplacementTokens.empty(); } + + /// AddTokenToBody - Add the specified token to the replacement text for the + /// macro. + void AddTokenToBody(const Token &Tok) { + ReplacementTokens.push_back(Tok); + } + + /// isEnabled - Return true if this macro is enabled: in other words, that we + /// are not currently in an expansion of this macro. + bool isEnabled() const { return !IsDisabled; } + + void EnableMacro() { + assert(IsDisabled && "Cannot enable an already-enabled macro!"); + IsDisabled = false; + } + + void DisableMacro() { + assert(!IsDisabled && "Cannot disable an already-disabled macro!"); + IsDisabled = true; + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/MultipleIncludeOpt.h b/include/clang/Lex/MultipleIncludeOpt.h new file mode 100644 index 0000000000000..94d4677f9d29c --- /dev/null +++ b/include/clang/Lex/MultipleIncludeOpt.h @@ -0,0 +1,130 @@ +//===--- MultipleIncludeOpt.h - Header Multiple-Include Optzn ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MultipleIncludeOpt interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_MULTIPLEINCLUDEOPT_H +#define LLVM_CLANG_MULTIPLEINCLUDEOPT_H + +namespace clang { +class IdentifierInfo; + +/// MultipleIncludeOpt - This class implements the simple state machine that the +/// Lexer class uses to detect files subject to the 'multiple-include' +/// optimization. The public methods in this class are triggered by various +/// events that occur when a file is lexed, and after the entire file is lexed, +/// information about which macro (if any) controls the header is returned. +class MultipleIncludeOpt { + /// ReadAnyTokens - This is set to false when a file is first opened and true + /// any time a token is returned to the client or a (non-multiple-include) + /// directive is parsed. When the final #endif is parsed this is reset back + /// to false, that way any tokens before the first #ifdef or after the last + /// #endif can be easily detected. + bool ReadAnyTokens; + + /// ReadAnyTokens - This is set to false when a file is first opened and true + /// any time a token is returned to the client or a (non-multiple-include) + /// directive is parsed. When the final #endif is parsed this is reset back + /// to false, that way any tokens before the first #ifdef or after the last + /// #endif can be easily detected. + bool DidMacroExpansion; + + /// TheMacro - The controlling macro for a file, if valid. + /// + const IdentifierInfo *TheMacro; +public: + MultipleIncludeOpt() { + ReadAnyTokens = false; + DidMacroExpansion = false; + TheMacro = 0; + } + + /// Invalidate - Permenantly mark this file as not being suitable for the + /// include-file optimization. + void Invalidate() { + // If we have read tokens but have no controlling macro, the state-machine + // below can never "accept". + ReadAnyTokens = true; + TheMacro = 0; + } + + /// getHasReadAnyTokensVal - This is used for the #ifndef hande-shake at the + /// top of the file when reading preprocessor directives. Otherwise, reading + /// the "ifndef x" would count as reading tokens. + bool getHasReadAnyTokensVal() const { return ReadAnyTokens; } + + // If a token is read, remember that we have seen a side-effect in this file. + void ReadToken() { ReadAnyTokens = true; } + + /// ExpandedMacro - When a macro is expanded with this lexer as the current + /// buffer, this method is called to disable the MIOpt if needed. + void ExpandedMacro() { DidMacroExpansion = true; } + + /// EnterTopLevelIFNDEF - When entering a top-level #ifndef directive (or the + /// "#if !defined" equivalent) without any preceding tokens, this method is + /// called. + /// + /// Note, we don't care about the input value of 'ReadAnyTokens'. The caller + /// ensures that this is only called if there are no tokens read before the + /// #ifndef. The caller is required to do this, because reading the #if line + /// obviously reads in in tokens. + void EnterTopLevelIFNDEF(const IdentifierInfo *M) { + // If the macro is already set, this is after the top-level #endif. + if (TheMacro) + return Invalidate(); + + // If we have already expanded a macro by the end of the #ifndef line, then + // there is a macro expansion *in* the #ifndef line. This means that the + // condition could evaluate differently when subsequently #included. Reject + // this. + if (DidMacroExpansion) + return Invalidate(); + + // Remember that we're in the #if and that we have the macro. + ReadAnyTokens = true; + TheMacro = M; + } + + /// EnterTopLevelConditional - This is invoked when a top level conditional + /// (except #ifndef) is found. + void EnterTopLevelConditional() { + /// If a conditional directive (except #ifndef) is found at the top level, + /// there is a chunk of the file not guarded by the controlling macro. + Invalidate(); + } + + /// ExitTopLevelConditional - This method is called when the lexer exits the + /// top-level conditional. + void ExitTopLevelConditional() { + // If we have a macro, that means the top of the file was ok. Set our state + // back to "not having read any tokens" so we can detect anything after the + // #endif. + if (!TheMacro) return Invalidate(); + + // At this point, we haven't "read any tokens" but we do have a controlling + // macro. + ReadAnyTokens = false; + } + + /// GetControllingMacroAtEndOfFile - Once the entire file has been lexed, if + /// there is a controlling macro, return it. + const IdentifierInfo *GetControllingMacroAtEndOfFile() const { + // If we haven't read any tokens after the #endif, return the controlling + // macro if it's valid (if it isn't, it will be null). + if (!ReadAnyTokens) + return TheMacro; + return 0; + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/PPCallbacks.h b/include/clang/Lex/PPCallbacks.h new file mode 100644 index 0000000000000..e5cbeebd22aa0 --- /dev/null +++ b/include/clang/Lex/PPCallbacks.h @@ -0,0 +1,122 @@ +//===--- PPCallbacks.h - Callbacks for Preprocessor actions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PPCallbacks interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPCALLBACKS_H +#define LLVM_CLANG_LEX_PPCALLBACKS_H + +#include "clang/Lex/DirectoryLookup.h" +#include "clang/Basic/SourceLocation.h" +#include <string> + +namespace clang { + class SourceLocation; + class Token; + class IdentifierInfo; + class MacroInfo; + +/// PPCallbacks - This interface provides a way to observe the actions of the +/// preprocessor as it does its thing. Clients can define their hooks here to +/// implement preprocessor level tools. +class PPCallbacks { +public: + virtual ~PPCallbacks(); + + enum FileChangeReason { + EnterFile, ExitFile, SystemHeaderPragma, RenameFile + }; + + /// FileChanged - This callback is invoked whenever a source file is + /// entered or exited. The SourceLocation indicates the new location, and + /// EnteringFile indicates whether this is because we are entering a new + /// #include'd file (when true) or whether we're exiting one because we ran + /// off the end (when false). + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType) { + } + + /// Ident - This callback is invoked when a #ident or #sccs directive is read. + /// + virtual void Ident(SourceLocation Loc, const std::string &str) { + } + + /// PragmaComment - This callback is invoked when a #pragma comment directive + /// is read. + /// + virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind, + const std::string &Str) { + } + + /// MacroExpands - This is called by + /// Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is + /// found. + virtual void MacroExpands(const Token &Id, const MacroInfo* MI) { + } + + /// MacroDefined - This hook is called whenever a macro definition is seen. + virtual void MacroDefined(const IdentifierInfo *II, const MacroInfo *MI) { + } + + /// MacroUndefined - This hook is called whenever a macro #undef is seen. + /// MI is released immediately following this callback. + virtual void MacroUndefined(const IdentifierInfo *II, const MacroInfo *MI) { + } +}; + +/// PPChainedCallbacks - Simple wrapper class for chaining callbacks. +class PPChainedCallbacks : public PPCallbacks { + PPCallbacks *First, *Second; + +public: + PPChainedCallbacks(PPCallbacks *_First, PPCallbacks *_Second) + : First(_First), Second(_Second) {} + ~PPChainedCallbacks() { + delete Second; + delete First; + } + + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType) { + First->FileChanged(Loc, Reason, FileType); + Second->FileChanged(Loc, Reason, FileType); + } + + virtual void Ident(SourceLocation Loc, const std::string &str) { + First->Ident(Loc, str); + Second->Ident(Loc, str); + } + + virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind, + const std::string &Str) { + First->PragmaComment(Loc, Kind, Str); + Second->PragmaComment(Loc, Kind, Str); + } + + virtual void MacroExpands(const Token &Id, const MacroInfo* MI) { + First->MacroExpands(Id, MI); + Second->MacroExpands(Id, MI); + } + + virtual void MacroDefined(const IdentifierInfo *II, const MacroInfo *MI) { + First->MacroDefined(II, MI); + Second->MacroDefined(II, MI); + } + + virtual void MacroUndefined(const IdentifierInfo *II, const MacroInfo *MI) { + First->MacroUndefined(II, MI); + Second->MacroUndefined(II, MI); + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/PTHLexer.h b/include/clang/Lex/PTHLexer.h new file mode 100644 index 0000000000000..369b818a1fc9e --- /dev/null +++ b/include/clang/Lex/PTHLexer.h @@ -0,0 +1,104 @@ +//===--- PTHLexer.h - Lexer based on Pre-tokenized input --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTHLexer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PTHLEXER_H +#define LLVM_CLANG_PTHLEXER_H + +#include "clang/Lex/PreprocessorLexer.h" +#include <vector> + +namespace clang { + +class PTHManager; +class PTHSpellingSearch; + +class PTHLexer : public PreprocessorLexer { + SourceLocation FileStartLoc; + + /// TokBuf - Buffer from PTH file containing raw token data. + const unsigned char* TokBuf; + + /// CurPtr - Pointer into current offset of the token buffer where + /// the next token will be read. + const unsigned char* CurPtr; + + /// LastHashTokPtr - Pointer into TokBuf of the last processed '#' + /// token that appears at the start of a line. + const unsigned char* LastHashTokPtr; + + /// PPCond - Pointer to a side table in the PTH file that provides a + /// a consise summary of the preproccessor conditional block structure. + /// This is used to perform quick skipping of conditional blocks. + const unsigned char* PPCond; + + /// CurPPCondPtr - Pointer inside PPCond that refers to the next entry + /// to process when doing quick skipping of preprocessor blocks. + const unsigned char* CurPPCondPtr; + + PTHLexer(const PTHLexer&); // DO NOT IMPLEMENT + void operator=(const PTHLexer&); // DO NOT IMPLEMENT + + /// ReadToken - Used by PTHLexer to read tokens TokBuf. + void ReadToken(Token& T); + + /// PTHMgr - The PTHManager object that created this PTHLexer. + PTHManager& PTHMgr; + + Token EofToken; + +protected: + friend class PTHManager; + + /// Create a PTHLexer for the specified token stream. + PTHLexer(Preprocessor& pp, FileID FID, const unsigned char *D, + const unsigned char* ppcond, PTHManager &PM); +public: + + ~PTHLexer() {} + + /// Lex - Return the next token. + void Lex(Token &Tok); + + void getEOF(Token &Tok); + + /// DiscardToEndOfLine - Read the rest of the current preprocessor line as an + /// uninterpreted string. This switches the lexer out of directive mode. + void DiscardToEndOfLine(); + + /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a + /// tok::l_paren token, 0 if it is something else and 2 if there are no more + /// tokens controlled by this lexer. + unsigned isNextPPTokenLParen() { + // isNextPPTokenLParen is not on the hot path, and all we care about is + // whether or not we are at a token with kind tok::eof or tok::l_paren. + // Just read the first byte from the current token pointer to determine + // its kind. + tok::TokenKind x = (tok::TokenKind)*CurPtr; + return x == tok::eof ? 2 : x == tok::l_paren; + } + + /// IndirectLex - An indirect call to 'Lex' that can be invoked via + /// the PreprocessorLexer interface. + void IndirectLex(Token &Result) { Lex(Result); } + + /// getSourceLocation - Return a source location for the token in + /// the current file. + SourceLocation getSourceLocation(); + + /// SkipBlock - Used by Preprocessor to skip the current conditional block. + bool SkipBlock(); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/PTHManager.h b/include/clang/Lex/PTHManager.h new file mode 100644 index 0000000000000..507576473f60e --- /dev/null +++ b/include/clang/Lex/PTHManager.h @@ -0,0 +1,141 @@ +//===--- PTHManager.h - Manager object for PTH processing -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTHManager interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PTHMANAGER_H +#define LLVM_CLANG_PTHMANAGER_H + +#include "clang/Lex/PTHLexer.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Allocator.h" +#include <string> + +namespace llvm { + class MemoryBuffer; +} + +namespace clang { + +class FileEntry; +class PTHLexer; +class Diagnostic; +class StatSysCallCache; + +class PTHManager : public IdentifierInfoLookup { + friend class PTHLexer; + + /// The memory mapped PTH file. + const llvm::MemoryBuffer* Buf; + + /// Alloc - Allocator used for IdentifierInfo objects. + llvm::BumpPtrAllocator Alloc; + + /// IdMap - A lazily generated cache mapping from persistent identifiers to + /// IdentifierInfo*. + IdentifierInfo** PerIDCache; + + /// FileLookup - Abstract data structure used for mapping between files + /// and token data in the PTH file. + void* FileLookup; + + /// IdDataTable - Array representing the mapping from persistent IDs to the + /// data offset within the PTH file containing the information to + /// reconsitute an IdentifierInfo. + const unsigned char* const IdDataTable; + + /// SortedIdTable - Abstract data structure mapping from strings to + /// persistent IDs. This is used by get(). + void* StringIdLookup; + + /// NumIds - The number of identifiers in the PTH file. + const unsigned NumIds; + + /// PP - The Preprocessor object that will use this PTHManager to create + /// PTHLexer objects. + Preprocessor* PP; + + /// SpellingBase - The base offset within the PTH memory buffer that + /// contains the cached spellings for literals. + const unsigned char* const SpellingBase; + + /// OriginalSourceFile - A null-terminated C-string that specifies the name + /// if the file (if any) that was to used to generate the PTH cache. + const char* OriginalSourceFile; + + /// This constructor is intended to only be called by the static 'Create' + /// method. + PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, + const unsigned char* idDataTable, IdentifierInfo** perIDCache, + void* stringIdLookup, unsigned numIds, + const unsigned char* spellingBase, const char *originalSourceFile); + + // Do not implement. + PTHManager(); + void operator=(const PTHManager&); + + /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached + /// spelling for a token. + unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer); + + /// GetIdentifierInfo - Used to reconstruct IdentifierInfo objects from the + /// PTH file. + inline IdentifierInfo* GetIdentifierInfo(unsigned PersistentID) { + // Check if the IdentifierInfo has already been resolved. + if (IdentifierInfo* II = PerIDCache[PersistentID]) + return II; + return LazilyCreateIdentifierInfo(PersistentID); + } + IdentifierInfo* LazilyCreateIdentifierInfo(unsigned PersistentID); + +public: + // The current PTH version. + enum { Version = 9 }; + + ~PTHManager(); + + /// getOriginalSourceFile - Return the full path to the original header + /// file name that was used to generate the PTH cache. + const char* getOriginalSourceFile() const { + return OriginalSourceFile; + } + + /// get - Return the identifier token info for the specified named identifier. + /// Unlike the version in IdentifierTable, this returns a pointer instead + /// of a reference. If the pointer is NULL then the IdentifierInfo cannot + /// be found. + IdentifierInfo *get(const char *NameStart, const char *NameEnd); + + /// Create - This method creates PTHManager objects. The 'file' argument + /// is the name of the PTH file. This method returns NULL upon failure. + static PTHManager *Create(const std::string& file, Diagnostic* Diags = 0, + Diagnostic::Level failureLevel=Diagnostic::Warning); + + void setPreprocessor(Preprocessor *pp) { PP = pp; } + + /// CreateLexer - Return a PTHLexer that "lexes" the cached tokens for the + /// specified file. This method returns NULL if no cached tokens exist. + /// It is the responsibility of the caller to 'delete' the returned object. + PTHLexer *CreateLexer(FileID FID); + + /// createStatCache - Returns a StatSysCallCache object for use with + /// FileManager objects. These objects use the PTH data to speed up + /// calls to stat by memoizing their results from when the PTH file + /// was generated. + StatSysCallCache *createStatCache(); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/Pragma.h b/include/clang/Lex/Pragma.h new file mode 100644 index 0000000000000..136dc6fabfb67 --- /dev/null +++ b/include/clang/Lex/Pragma.h @@ -0,0 +1,90 @@ +//===--- Pragma.h - Pragma registration and handling ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PragmaHandler and PragmaTable interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PRAGMA_H +#define LLVM_CLANG_PRAGMA_H + +#include <cassert> +#include <vector> + +namespace clang { + class Preprocessor; + class Token; + class IdentifierInfo; + class PragmaNamespace; + +/// PragmaHandler - Instances of this interface defined to handle the various +/// pragmas that the language front-end uses. Each handler optionally has a +/// name (e.g. "pack") and the HandlePragma method is invoked when a pragma with +/// that identifier is found. If a handler does not match any of the declared +/// pragmas the handler with a null identifier is invoked, if it exists. +/// +/// Note that the PragmaNamespace class can be used to subdivide pragmas, e.g. +/// we treat "#pragma STDC" and "#pragma GCC" as namespaces that contain other +/// pragmas. +class PragmaHandler { + const IdentifierInfo *Name; +public: + PragmaHandler(const IdentifierInfo *name) : Name(name) {} + virtual ~PragmaHandler(); + + const IdentifierInfo *getName() const { return Name; } + virtual void HandlePragma(Preprocessor &PP, Token &FirstToken) = 0; + + /// getIfNamespace - If this is a namespace, return it. This is equivalent to + /// using a dynamic_cast, but doesn't require RTTI. + virtual PragmaNamespace *getIfNamespace() { return 0; } +}; + +/// PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, +/// allowing hierarchical pragmas to be defined. Common examples of namespaces +/// are "#pragma GCC", "#pragma STDC", and "#pragma omp", but any namespaces may +/// be (potentially recursively) defined. +class PragmaNamespace : public PragmaHandler { + /// Handlers - This is the list of handlers in this namespace. + /// + std::vector<PragmaHandler*> Handlers; +public: + PragmaNamespace(const IdentifierInfo *Name) : PragmaHandler(Name) {} + virtual ~PragmaNamespace(); + + /// FindHandler - Check to see if there is already a handler for the + /// specified name. If not, return the handler for the null identifier if it + /// exists, otherwise return null. If IgnoreNull is true (the default) then + /// the null handler isn't returned on failure to match. + PragmaHandler *FindHandler(const IdentifierInfo *Name, + bool IgnoreNull = true) const; + + /// AddPragma - Add a pragma to this namespace. + /// + void AddPragma(PragmaHandler *Handler) { + Handlers.push_back(Handler); + } + + /// RemovePragmaHandler - Remove the given handler from the + /// namespace. + void RemovePragmaHandler(PragmaHandler *Handler); + + bool IsEmpty() { + return Handlers.empty(); + } + + virtual void HandlePragma(Preprocessor &PP, Token &FirstToken); + + virtual PragmaNamespace *getIfNamespace() { return this; } +}; + + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h new file mode 100644 index 0000000000000..5b9959c32a2d6 --- /dev/null +++ b/include/clang/Lex/Preprocessor.h @@ -0,0 +1,801 @@ +//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Preprocessor interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H +#define LLVM_CLANG_LEX_PREPROCESSOR_H + +#include "clang/Lex/Lexer.h" +#include "clang/Lex/PTHLexer.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/TokenLexer.h" +#include "clang/Lex/PTHManager.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/Allocator.h" + +namespace clang { + +class SourceManager; +class FileManager; +class FileEntry; +class HeaderSearch; +class PragmaNamespace; +class PragmaHandler; +class ScratchBuffer; +class TargetInfo; +class PPCallbacks; +class DirectoryLookup; + +/// Preprocessor - This object engages in a tight little dance with the lexer to +/// efficiently preprocess tokens. Lexers know only about tokens within a +/// single source file, and don't know anything about preprocessor-level issues +/// like the #include stack, token expansion, etc. +/// +class Preprocessor { + Diagnostic *Diags; + const LangOptions &Features; + TargetInfo &Target; + FileManager &FileMgr; + SourceManager &SourceMgr; + ScratchBuffer *ScratchBuf; + HeaderSearch &HeaderInfo; + + /// PTH - An optional PTHManager object used for getting tokens from + /// a token cache rather than lexing the original source file. + llvm::OwningPtr<PTHManager> PTH; + + /// BP - A BumpPtrAllocator object used to quickly allocate and release + /// objects internal to the Preprocessor. + llvm::BumpPtrAllocator BP; + + /// Identifiers for builtin macros and other builtins. + IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ + IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ + IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ + IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ + IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ + IdentifierInfo *Ident__COUNTER__; // __COUNTER__ + IdentifierInfo *Ident_Pragma, *Ident__VA_ARGS__; // _Pragma, __VA_ARGS__ + + SourceLocation DATELoc, TIMELoc; + unsigned CounterValue; // Next __COUNTER__ value. + + enum { + /// MaxIncludeStackDepth - Maximum depth of #includes. + MaxAllowedIncludeStackDepth = 200 + }; + + // State that is set before the preprocessor begins. + bool KeepComments : 1; + bool KeepMacroComments : 1; + + // State that changes while the preprocessor runs: + bool DisableMacroExpansion : 1; // True if macro expansion is disabled. + bool InMacroArgs : 1; // True if parsing fn macro invocation args. + + /// Identifiers - This is mapping/lookup information for all identifiers in + /// the program, including program keywords. + IdentifierTable Identifiers; + + /// Selectors - This table contains all the selectors in the program. Unlike + /// IdentifierTable above, this table *isn't* populated by the preprocessor. + /// It is declared/instantiated here because it's role/lifetime is + /// conceptually similar the IdentifierTable. In addition, the current control + /// flow (in clang::ParseAST()), make it convenient to put here. + /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to + /// the lifetime fo the preprocessor. + SelectorTable Selectors; + + /// PragmaHandlers - This tracks all of the pragmas that the client registered + /// with this preprocessor. + PragmaNamespace *PragmaHandlers; + + /// CurLexer - This is the current top of the stack that we're lexing from if + /// not expanding a macro and we are lexing directly from source code. + /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. + llvm::OwningPtr<Lexer> CurLexer; + + /// CurPTHLexer - This is the current top of stack that we're lexing from if + /// not expanding from a macro and we are lexing from a PTH cache. + /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. + llvm::OwningPtr<PTHLexer> CurPTHLexer; + + /// CurPPLexer - This is the current top of the stack what we're lexing from + /// if not expanding a macro. This is an alias for either CurLexer or + /// CurPTHLexer. + PreprocessorLexer* CurPPLexer; + + /// CurLookup - The DirectoryLookup structure used to find the current + /// FileEntry, if CurLexer is non-null and if applicable. This allows us to + /// implement #include_next and find directory-specific properties. + const DirectoryLookup *CurDirLookup; + + /// CurTokenLexer - This is the current macro we are expanding, if we are + /// expanding a macro. One of CurLexer and CurTokenLexer must be null. + llvm::OwningPtr<TokenLexer> CurTokenLexer; + + /// IncludeMacroStack - This keeps track of the stack of files currently + /// #included, and macros currently being expanded from, not counting + /// CurLexer/CurTokenLexer. + struct IncludeStackInfo { + Lexer *TheLexer; + PTHLexer *ThePTHLexer; + PreprocessorLexer *ThePPLexer; + TokenLexer *TheTokenLexer; + const DirectoryLookup *TheDirLookup; + + IncludeStackInfo(Lexer *L, PTHLexer* P, PreprocessorLexer* PPL, + TokenLexer* TL, const DirectoryLookup *D) + : TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL), TheTokenLexer(TL), + TheDirLookup(D) {} + }; + std::vector<IncludeStackInfo> IncludeMacroStack; + + /// Callbacks - These are actions invoked when some preprocessor activity is + /// encountered (e.g. a file is #included, etc). + PPCallbacks *Callbacks; + + /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping + /// to the actual definition of the macro. + llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros; + + /// MICache - A "freelist" of MacroInfo objects that can be reused for quick + /// allocation. + std::vector<MacroInfo*> MICache; + + // Various statistics we track for performance analysis. + unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma; + unsigned NumIf, NumElse, NumEndif; + unsigned NumEnteredSourceFiles, MaxIncludeStackDepth; + unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded; + unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste; + unsigned NumSkipped; + + /// Predefines - This string is the predefined macros that preprocessor + /// should use from the command line etc. + std::string Predefines; + + /// TokenLexerCache - Cache macro expanders to reduce malloc traffic. + enum { TokenLexerCacheSize = 8 }; + unsigned NumCachedTokenLexers; + TokenLexer *TokenLexerCache[TokenLexerCacheSize]; + +private: // Cached tokens state. + typedef std::vector<Token> CachedTokensTy; + + /// CachedTokens - Cached tokens are stored here when we do backtracking or + /// lookahead. They are "lexed" by the CachingLex() method. + CachedTokensTy CachedTokens; + + /// CachedLexPos - The position of the cached token that CachingLex() should + /// "lex" next. If it points beyond the CachedTokens vector, it means that + /// a normal Lex() should be invoked. + CachedTokensTy::size_type CachedLexPos; + + /// BacktrackPositions - Stack of backtrack positions, allowing nested + /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to + /// indicate where CachedLexPos should be set when the BackTrack() method is + /// invoked (at which point the last position is popped). + std::vector<CachedTokensTy::size_type> BacktrackPositions; + +public: + Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target, + SourceManager &SM, HeaderSearch &Headers, + IdentifierInfoLookup* IILookup = 0); + + ~Preprocessor(); + + Diagnostic &getDiagnostics() const { return *Diags; } + void setDiagnostics(Diagnostic &D) { Diags = &D; } + + + const LangOptions &getLangOptions() const { return Features; } + TargetInfo &getTargetInfo() const { return Target; } + FileManager &getFileManager() const { return FileMgr; } + SourceManager &getSourceManager() const { return SourceMgr; } + HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } + + IdentifierTable &getIdentifierTable() { return Identifiers; } + SelectorTable &getSelectorTable() { return Selectors; } + llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } + + void setPTHManager(PTHManager* pm); + + PTHManager *getPTHManager() { return PTH.get(); } + + /// SetCommentRetentionState - Control whether or not the preprocessor retains + /// comments in output. + void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { + this->KeepComments = KeepComments | KeepMacroComments; + this->KeepMacroComments = KeepMacroComments; + } + + bool getCommentRetentionState() const { return KeepComments; } + + /// isCurrentLexer - Return true if we are lexing directly from the specified + /// lexer. + bool isCurrentLexer(const PreprocessorLexer *L) const { + return CurPPLexer == L; + } + + /// getCurrentLexer - Return the current file lexer being lexed from. Note + /// that this ignores any potentially active macro expansions and _Pragma + /// expansions going on at the time. + PreprocessorLexer *getCurrentFileLexer() const; + + /// getPPCallbacks/setPPCallbacks - Accessors for preprocessor callbacks. + /// Note that this class takes ownership of any PPCallbacks object given to + /// it. + PPCallbacks *getPPCallbacks() const { return Callbacks; } + void setPPCallbacks(PPCallbacks *C) { + if (Callbacks) + C = new PPChainedCallbacks(C, Callbacks); + Callbacks = C; + } + + /// getMacroInfo - Given an identifier, return the MacroInfo it is #defined to + /// or null if it isn't #define'd. + MacroInfo *getMacroInfo(IdentifierInfo *II) const { + return II->hasMacroDefinition() ? Macros.find(II)->second : 0; + } + + /// setMacroInfo - Specify a macro for this identifier. + /// + void setMacroInfo(IdentifierInfo *II, MacroInfo *MI); + + /// macro_iterator/macro_begin/macro_end - This allows you to walk the current + /// state of the macro table. This visits every currently-defined macro. + typedef llvm::DenseMap<IdentifierInfo*, + MacroInfo*>::const_iterator macro_iterator; + macro_iterator macro_begin() const { return Macros.begin(); } + macro_iterator macro_end() const { return Macros.end(); } + + + + const std::string &getPredefines() const { return Predefines; } + /// setPredefines - Set the predefines for this Preprocessor. These + /// predefines are automatically injected when parsing the main file. + void setPredefines(const char *P) { Predefines = P; } + void setPredefines(const std::string &P) { Predefines = P; } + + /// getIdentifierInfo - Return information about the specified preprocessor + /// identifier token. The version of this method that takes two character + /// pointers is preferred unless the identifier is already available as a + /// string (this avoids allocation and copying of memory to construct an + /// std::string). + IdentifierInfo *getIdentifierInfo(const char *NameStart, + const char *NameEnd) { + return &Identifiers.get(NameStart, NameEnd); + } + IdentifierInfo *getIdentifierInfo(const char *NameStr) { + return getIdentifierInfo(NameStr, NameStr+strlen(NameStr)); + } + + /// AddPragmaHandler - Add the specified pragma handler to the preprocessor. + /// If 'Namespace' is non-null, then it is a token required to exist on the + /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". + void AddPragmaHandler(const char *Namespace, PragmaHandler *Handler); + + /// RemovePragmaHandler - Remove the specific pragma handler from + /// the preprocessor. If \arg Namespace is non-null, then it should + /// be the namespace that \arg Handler was added to. It is an error + /// to remove a handler that has not been registered. + void RemovePragmaHandler(const char *Namespace, PragmaHandler *Handler); + + /// EnterMainSourceFile - Enter the specified FileID as the main source file, + /// which implicitly adds the builtin defines etc. + void EnterMainSourceFile(); + + /// EnterSourceFile - Add a source file to the top of the include stack and + /// start lexing tokens from it instead of the current buffer. If isMainFile + /// is true, this is the main file for the translation unit. + void EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir); + + /// EnterMacro - Add a Macro to the top of the include stack and start lexing + /// tokens from it instead of the current buffer. Args specifies the + /// tokens input to a function-like macro. + /// + /// ILEnd specifies the location of the ')' for a function-like macro or the + /// identifier for an object-like macro. + void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroArgs *Args); + + /// EnterTokenStream - Add a "macro" context to the top of the include stack, + /// which will cause the lexer to start returning the specified tokens. + /// + /// If DisableMacroExpansion is true, tokens lexed from the token stream will + /// not be subject to further macro expansion. Otherwise, these tokens will + /// be re-macro-expanded when/if expansion is enabled. + /// + /// If OwnsTokens is false, this method assumes that the specified stream of + /// tokens has a permanent owner somewhere, so they do not need to be copied. + /// If it is true, it assumes the array of tokens is allocated with new[] and + /// must be freed. + /// + void EnterTokenStream(const Token *Toks, unsigned NumToks, + bool DisableMacroExpansion, bool OwnsTokens); + + /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the + /// lexer stack. This should only be used in situations where the current + /// state of the top-of-stack lexer is known. + void RemoveTopOfLexerStack(); + + /// EnableBacktrackAtThisPos - From the point that this method is called, and + /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor + /// keeps track of the lexed tokens so that a subsequent Backtrack() call will + /// make the Preprocessor re-lex the same tokens. + /// + /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can + /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will + /// be combined with the EnableBacktrackAtThisPos calls in reverse order. + /// + /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack + /// at some point after EnableBacktrackAtThisPos. If you don't, caching of + /// tokens will continue indefinitely. + /// + void EnableBacktrackAtThisPos(); + + /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call. + void CommitBacktrackedTokens(); + + /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since + /// EnableBacktrackAtThisPos() was previously called. + void Backtrack(); + + /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and + /// caching of tokens is on. + bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } + + /// Lex - To lex a token from the preprocessor, just pull a token from the + /// current lexer or macro object. + void Lex(Token &Result) { + if (CurLexer) + CurLexer->Lex(Result); + else if (CurPTHLexer) + CurPTHLexer->Lex(Result); + else if (CurTokenLexer) + CurTokenLexer->Lex(Result); + else + CachingLex(Result); + } + + /// LexNonComment - Lex a token. If it's a comment, keep lexing until we get + /// something not a comment. This is useful in -E -C mode where comments + /// would foul up preprocessor directive handling. + void LexNonComment(Token &Result) { + do + Lex(Result); + while (Result.getKind() == tok::comment); + } + + /// LexUnexpandedToken - This is just like Lex, but this disables macro + /// expansion of identifier tokens. + void LexUnexpandedToken(Token &Result) { + // Disable macro expansion. + bool OldVal = DisableMacroExpansion; + DisableMacroExpansion = true; + // Lex the token. + Lex(Result); + + // Reenable it. + DisableMacroExpansion = OldVal; + } + + /// LookAhead - This peeks ahead N tokens and returns that token without + /// consuming any tokens. LookAhead(0) returns the next token that would be + /// returned by Lex(), LookAhead(1) returns the token after it, etc. This + /// returns normal tokens after phase 5. As such, it is equivalent to using + /// 'Lex', not 'LexUnexpandedToken'. + const Token &LookAhead(unsigned N) { + if (CachedLexPos + N < CachedTokens.size()) + return CachedTokens[CachedLexPos+N]; + else + return PeekAhead(N+1); + } + + /// RevertCachedTokens - When backtracking is enabled and tokens are cached, + /// this allows to revert a specific number of tokens. + /// Note that the number of tokens being reverted should be up to the last + /// backtrack position, not more. + void RevertCachedTokens(unsigned N) { + assert(isBacktrackEnabled() && + "Should only be called when tokens are cached for backtracking"); + assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) + && "Should revert tokens up to the last backtrack position, not more"); + assert(signed(CachedLexPos) - signed(N) >= 0 && + "Corrupted backtrack positions ?"); + CachedLexPos -= N; + } + + /// EnterToken - Enters a token in the token stream to be lexed next. If + /// BackTrack() is called afterwards, the token will remain at the insertion + /// point. + void EnterToken(const Token &Tok) { + EnterCachingLexMode(); + CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); + } + + /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching + /// tokens (because backtrack is enabled) it should replace the most recent + /// cached tokens with the given annotation token. This function has no effect + /// if backtracking is not enabled. + /// + /// Note that the use of this function is just for optimization; so that the + /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is + /// invoked. + void AnnotateCachedTokens(const Token &Tok) { + assert(Tok.isAnnotation() && "Expected annotation token"); + if (CachedLexPos != 0 && isBacktrackEnabled()) + AnnotatePreviousCachedTokens(Tok); + } + + /// \brief Replace the last token with an annotation token. + /// + /// Like AnnotateCachedTokens(), this routine replaces an + /// already-parsed (and resolved) token with an annotation + /// token. However, this routine only replaces the last token with + /// the annotation token; it does not affect any other cached + /// tokens. This function has no effect if backtracking is not + /// enabled. + void ReplaceLastTokenWithAnnotation(const Token &Tok) { + assert(Tok.isAnnotation() && "Expected annotation token"); + if (CachedLexPos != 0 && isBacktrackEnabled()) + CachedTokens[CachedLexPos-1] = Tok; + } + + /// Diag - Forwarding function for diagnostics. This emits a diagnostic at + /// the specified Token's location, translating the token's start + /// position in the current buffer into a SourcePosition object for rendering. + DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) { + return Diags->Report(FullSourceLoc(Loc, getSourceManager()), DiagID); + } + + DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) { + return Diags->Report(FullSourceLoc(Tok.getLocation(), getSourceManager()), + DiagID); + } + + /// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a + /// token is the characters used to represent the token in the source file + /// after trigraph expansion and escaped-newline folding. In particular, this + /// wants to get the true, uncanonicalized, spelling of things like digraphs + /// UCNs, etc. + std::string getSpelling(const Token &Tok) const; + + /// getSpelling - This method is used to get the spelling of a token into a + /// preallocated buffer, instead of as an std::string. The caller is required + /// to allocate enough space for the token, which is guaranteed to be at least + /// Tok.getLength() bytes long. The length of the actual result is returned. + /// + /// Note that this method may do two possible things: it may either fill in + /// the buffer specified with characters, or it may *change the input pointer* + /// to point to a constant buffer with the data already in it (avoiding a + /// copy). The caller is not allowed to modify the returned buffer pointer + /// if an internal buffer is returned. + unsigned getSpelling(const Token &Tok, const char *&Buffer) const; + + /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant + /// with length 1, return the character. + char getSpellingOfSingleCharacterNumericConstant(const Token &Tok) const { + assert(Tok.is(tok::numeric_constant) && + Tok.getLength() == 1 && "Called on unsupported token"); + assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); + + // If the token is carrying a literal data pointer, just use it. + if (const char *D = Tok.getLiteralData()) + return *D; + + // Otherwise, fall back on getCharacterData, which is slower, but always + // works. + return *SourceMgr.getCharacterData(Tok.getLocation()); + } + + /// CreateString - Plop the specified string into a scratch buffer and set the + /// specified token's location and length to it. If specified, the source + /// location provides a location of the instantiation point of the token. + void CreateString(const char *Buf, unsigned Len, + Token &Tok, SourceLocation SourceLoc = SourceLocation()); + + /// \brief Computes the source location just past the end of the + /// token at this source location. + /// + /// This routine can be used to produce a source location that + /// points just past the end of the token referenced by \p Loc, and + /// is generally used when a diagnostic needs to point just after a + /// token where it expected something different that it received. If + /// the returned source location would not be meaningful (e.g., if + /// it points into a macro), this routine returns an invalid + /// source location. + SourceLocation getLocForEndOfToken(SourceLocation Loc); + + /// DumpToken - Print the token to stderr, used for debugging. + /// + void DumpToken(const Token &Tok, bool DumpFlags = false) const; + void DumpLocation(SourceLocation Loc) const; + void DumpMacro(const MacroInfo &MI) const; + + /// AdvanceToTokenCharacter - Given a location that specifies the start of a + /// token, return a new location that specifies a character within the token. + SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char); + + /// IncrementPasteCounter - Increment the counters for the number of token + /// paste operations performed. If fast was specified, this is a 'fast paste' + /// case we handled. + /// + void IncrementPasteCounter(bool isFast) { + if (isFast) + ++NumFastTokenPaste; + else + ++NumTokenPaste; + } + + void PrintStats(); + + /// HandleMicrosoftCommentPaste - When the macro expander pastes together a + /// comment (/##/) in microsoft mode, this method handles updating the current + /// state, returning the token on the next source line. + void HandleMicrosoftCommentPaste(Token &Tok); + + //===--------------------------------------------------------------------===// + // Preprocessor callback methods. These are invoked by a lexer as various + // directives and events are found. + + /// LookUpIdentifierInfo - Given a tok::identifier token, look up the + /// identifier information for the token and install it into the token. + IdentifierInfo *LookUpIdentifierInfo(Token &Identifier, + const char *BufPtr = 0); + + /// HandleIdentifier - This callback is invoked when the lexer reads an + /// identifier and has filled in the tokens IdentifierInfo member. This + /// callback potentially macro expands it or turns it into a named token (like + /// 'for'). + void HandleIdentifier(Token &Identifier); + + + /// HandleEndOfFile - This callback is invoked when the lexer hits the end of + /// the current file. This either returns the EOF token and returns true, or + /// pops a level off the include stack and returns false, at which point the + /// client should call lex again. + bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); + + /// HandleEndOfTokenLexer - This callback is invoked when the current + /// TokenLexer hits the end of its token stream. + bool HandleEndOfTokenLexer(Token &Result); + + /// HandleDirective - This callback is invoked when the lexer sees a # token + /// at the start of a line. This consumes the directive, modifies the + /// lexer/preprocessor state, and advances the lexer(s) so that the next token + /// read is the correct one. + void HandleDirective(Token &Result); + + /// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If + /// not, emit a diagnostic and consume up until the eom. If EnableMacros is + /// true, then we consider macros that expand to zero tokens as being ok. + void CheckEndOfDirective(const char *Directive, bool EnableMacros = false); + + /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the + /// current line until the tok::eom token is found. + void DiscardUntilEndOfDirective(); + + /// SawDateOrTime - This returns true if the preprocessor has seen a use of + /// __DATE__ or __TIME__ in the file so far. + bool SawDateOrTime() const { + return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); + } + unsigned getCounterValue() const { return CounterValue; } + void setCounterValue(unsigned V) { CounterValue = V; } + + /// AllocateMacroInfo - Allocate a new MacroInfo object with the provide + /// SourceLocation. + MacroInfo* AllocateMacroInfo(SourceLocation L); + +private: + + void PushIncludeMacroStack() { + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer.take(), + CurPTHLexer.take(), + CurPPLexer, + CurTokenLexer.take(), + CurDirLookup)); + CurPPLexer = 0; + } + + void PopIncludeMacroStack() { + CurLexer.reset(IncludeMacroStack.back().TheLexer); + CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer); + CurPPLexer = IncludeMacroStack.back().ThePPLexer; + CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer); + CurDirLookup = IncludeMacroStack.back().TheDirLookup; + IncludeMacroStack.pop_back(); + } + + /// ReleaseMacroInfo - Release the specified MacroInfo. This memory will + /// be reused for allocating new MacroInfo objects. + void ReleaseMacroInfo(MacroInfo* MI); + + /// isInPrimaryFile - Return true if we're in the top-level file, not in a + /// #include. + bool isInPrimaryFile() const; + + /// ReadMacroName - Lex and validate a macro name, which occurs after a + /// #define or #undef. This emits a diagnostic, sets the token kind to eom, + /// and discards the rest of the macro line if the macro name is invalid. + void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0); + + /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro + /// definition has just been read. Lex the rest of the arguments and the + /// closing ), updating MI with what we learn. Return true if an error occurs + /// parsing the arg list. + bool ReadMacroDefinitionArgList(MacroInfo *MI); + + /// SkipExcludedConditionalBlock - We just read a #if or related directive and + /// decided that the subsequent tokens are in the #if'd out portion of the + /// file. Lex the rest of the file, until we see an #endif. If + /// FoundNonSkipPortion is true, then we have already emitted code for part of + /// this #if directive, so #else/#elif blocks should never be entered. If + /// FoundElse is false, then #else directives are ok, if not, then we have + /// already seen one so a #else directive is a duplicate. When this returns, + /// the caller can lex the first valid token. + void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, + bool FoundNonSkipPortion, bool FoundElse); + + /// PTHSkipExcludedConditionalBlock - A fast PTH version of + /// SkipExcludedConditionalBlock. + void PTHSkipExcludedConditionalBlock(); + + /// EvaluateDirectiveExpression - Evaluate an integer constant expression that + /// may occur after a #if or #elif directive and return it as a bool. If the + /// expression is equivalent to "!defined(X)" return X in IfNDefMacro. + bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); + + /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas: + /// #pragma GCC poison/system_header/dependency and #pragma once. + void RegisterBuiltinPragmas(); + + /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the + /// identifier table. + void RegisterBuiltinMacros(); + IdentifierInfo *RegisterBuiltinMacro(const char *Name); + + /// HandleMacroExpandedIdentifier - If an identifier token is read that is to + /// be expanded as a macro, handle it and return the next token as 'Tok'. If + /// the macro should not be expanded return true, otherwise return false. + bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI); + + /// isNextPPTokenLParen - Determine whether the next preprocessor token to be + /// lexed is a '('. If so, consume the token and return true, if not, this + /// method should have no observable side-effect on the lexed tokens. + bool isNextPPTokenLParen(); + + /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is + /// invoked to read all of the formal arguments specified for the macro + /// invocation. This returns null on error. + MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI, + SourceLocation &InstantiationEnd); + + /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded + /// as a builtin macro, handle it and return the next token as 'Tok'. + void ExpandBuiltinMacro(Token &Tok); + + /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then + /// return the first token after the directive. The _Pragma token has just + /// been read into 'Tok'. + void Handle_Pragma(Token &Tok); + + /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and + /// start lexing tokens from it instead of the current buffer. + void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); + + /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and + /// start getting tokens from it using the PTH cache. + void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir); + + /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully + /// checked and spelled filename, e.g. as an operand of #include. This returns + /// true if the input filename was in <>'s or false if it were in ""'s. The + /// caller is expected to provide a buffer that is large enough to hold the + /// spelling of the filename, but is also expected to handle the case when + /// this method decides to use a different buffer. + bool GetIncludeFilenameSpelling(SourceLocation Loc, + const char *&BufStart, const char *&BufEnd); + + /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, + /// return null on failure. isAngled indicates whether the file reference is + /// for system #include's or not (i.e. using <> instead of ""). + const FileEntry *LookupFile(const char *FilenameStart,const char *FilenameEnd, + bool isAngled, const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir); + + + + /// IsFileLexer - Returns true if we are lexing from a file and not a + /// pragma or a macro. + static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { + return L ? !L->isPragmaLexer() : P != 0; + } + + static bool IsFileLexer(const IncludeStackInfo& I) { + return IsFileLexer(I.TheLexer, I.ThePPLexer); + } + + bool IsFileLexer() const { + return IsFileLexer(CurLexer.get(), CurPPLexer); + } + + //===--------------------------------------------------------------------===// + // Caching stuff. + void CachingLex(Token &Result); + bool InCachingLexMode() const { return CurPPLexer == 0 && CurTokenLexer == 0;} + void EnterCachingLexMode(); + void ExitCachingLexMode() { + if (InCachingLexMode()) + RemoveTopOfLexerStack(); + } + const Token &PeekAhead(unsigned N); + void AnnotatePreviousCachedTokens(const Token &Tok); + + //===--------------------------------------------------------------------===// + /// Handle*Directive - implement the various preprocessor directives. These + /// should side-effect the current preprocessor object so that the next call + /// to Lex() will return the appropriate token next. + void HandleLineDirective(Token &Tok); + void HandleDigitDirective(Token &Tok); + void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); + void HandleIdentSCCSDirective(Token &Tok); + + // File inclusion. + void HandleIncludeDirective(Token &Tok, + const DirectoryLookup *LookupFrom = 0, + bool isImport = false); + void HandleIncludeNextDirective(Token &Tok); + void HandleIncludeMacrosDirective(Token &Tok); + void HandleImportDirective(Token &Tok); + + // Macro handling. + void HandleDefineDirective(Token &Tok); + void HandleUndefDirective(Token &Tok); + // HandleAssertDirective(Token &Tok); + // HandleUnassertDirective(Token &Tok); + + // Conditional Inclusion. + void HandleIfdefDirective(Token &Tok, bool isIfndef, + bool ReadAnyTokensBeforeDirective); + void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective); + void HandleEndifDirective(Token &Tok); + void HandleElseDirective(Token &Tok); + void HandleElifDirective(Token &Tok); + + // Pragmas. + void HandlePragmaDirective(); +public: + void HandlePragmaOnce(Token &OnceTok); + void HandlePragmaMark(); + void HandlePragmaPoison(Token &PoisonTok); + void HandlePragmaSystemHeader(Token &SysHeaderTok); + void HandlePragmaDependency(Token &DependencyTok); + void HandlePragmaComment(Token &CommentTok); +}; + +/// PreprocessorFactory - A generic factory interface for lazily creating +/// Preprocessor objects on-demand when they are needed. +class PreprocessorFactory { +public: + virtual ~PreprocessorFactory(); + virtual Preprocessor* CreatePreprocessor() = 0; +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/PreprocessorLexer.h b/include/clang/Lex/PreprocessorLexer.h new file mode 100644 index 0000000000000..f98b5599658fd --- /dev/null +++ b/include/clang/Lex/PreprocessorLexer.h @@ -0,0 +1,161 @@ +//===--- PreprocessorLexer.h - C Language Family Lexer ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PreprocessorLexer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PreprocessorLexer_H +#define LLVM_CLANG_PreprocessorLexer_H + +#include "clang/Lex/MultipleIncludeOpt.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/SmallVector.h" +#include <string> + +namespace clang { + +class Preprocessor; + +class PreprocessorLexer { +protected: + Preprocessor *PP; // Preprocessor object controlling lexing. + + /// The SourceManager FileID corresponding to the file being lexed. + const FileID FID; + + //===--------------------------------------------------------------------===// + // Context-specific lexing flags set by the preprocessor. + //===--------------------------------------------------------------------===// + + /// ParsingPreprocessorDirective - This is true when parsing #XXX. This turns + /// '\n' into a tok::eom token. + bool ParsingPreprocessorDirective; + + /// ParsingFilename - True after #include: this turns <xx> into a + /// tok::angle_string_literal token. + bool ParsingFilename; + + /// LexingRawMode - True if in raw mode: This flag disables interpretation of + /// tokens and is a far faster mode to lex in than non-raw-mode. This flag: + /// 1. If EOF of the current lexer is found, the include stack isn't popped. + /// 2. Identifier information is not looked up for identifier tokens. As an + /// effect of this, implicit macro expansion is naturally disabled. + /// 3. "#" tokens at the start of a line are treated as normal tokens, not + /// implicitly transformed by the lexer. + /// 4. All diagnostic messages are disabled. + /// 5. No callbacks are made into the preprocessor. + /// + /// Note that in raw mode that the PP pointer may be null. + bool LexingRawMode; + + /// MIOpt - This is a state machine that detects the #ifndef-wrapping a file + /// idiom for the multiple-include optimization. + MultipleIncludeOpt MIOpt; + + /// ConditionalStack - Information about the set of #if/#ifdef/#ifndef blocks + /// we are currently in. + llvm::SmallVector<PPConditionalInfo, 4> ConditionalStack; + + PreprocessorLexer(const PreprocessorLexer&); // DO NOT IMPLEMENT + void operator=(const PreprocessorLexer&); // DO NOT IMPLEMENT + friend class Preprocessor; + + PreprocessorLexer(Preprocessor *pp, FileID fid) + : PP(pp), FID(fid), ParsingPreprocessorDirective(false), + ParsingFilename(false), LexingRawMode(false) {} + + PreprocessorLexer() + : PP(0), + ParsingPreprocessorDirective(false), + ParsingFilename(false), + LexingRawMode(false) {} + + virtual ~PreprocessorLexer() {} + + virtual void IndirectLex(Token& Result) = 0; + + /// getSourceLocation - Return the source location for the next observable + /// location. + virtual SourceLocation getSourceLocation() = 0; + + //===--------------------------------------------------------------------===// + // #if directive handling. + + /// pushConditionalLevel - When we enter a #if directive, this keeps track of + /// what we are currently in for diagnostic emission (e.g. #if with missing + /// #endif). + void pushConditionalLevel(SourceLocation DirectiveStart, bool WasSkipping, + bool FoundNonSkip, bool FoundElse) { + PPConditionalInfo CI; + CI.IfLoc = DirectiveStart; + CI.WasSkipping = WasSkipping; + CI.FoundNonSkip = FoundNonSkip; + CI.FoundElse = FoundElse; + ConditionalStack.push_back(CI); + } + void pushConditionalLevel(const PPConditionalInfo &CI) { + ConditionalStack.push_back(CI); + } + + /// popConditionalLevel - Remove an entry off the top of the conditional + /// stack, returning information about it. If the conditional stack is empty, + /// this returns true and does not fill in the arguments. + bool popConditionalLevel(PPConditionalInfo &CI) { + if (ConditionalStack.empty()) return true; + CI = ConditionalStack.back(); + ConditionalStack.pop_back(); + return false; + } + + /// peekConditionalLevel - Return the top of the conditional stack. This + /// requires that there be a conditional active. + PPConditionalInfo &peekConditionalLevel() { + assert(!ConditionalStack.empty() && "No conditionals active!"); + return ConditionalStack.back(); + } + + unsigned getConditionalStackDepth() const { return ConditionalStack.size(); } + +public: + + //===--------------------------------------------------------------------===// + // Misc. lexing methods. + + /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and + /// (potentially) macro expand the filename. If the sequence parsed is not + /// lexically legal, emit a diagnostic and return a result EOM token. + void LexIncludeFilename(Token &Result); + + /// setParsingPreprocessorDirective - Inform the lexer whether or not + /// we are currently lexing a preprocessor directive. + void setParsingPreprocessorDirective(bool f) { + ParsingPreprocessorDirective = f; + } + + /// isLexingRawMode - Return true if this lexer is in raw mode or not. + bool isLexingRawMode() const { return LexingRawMode; } + + /// getPP - Return the preprocessor object for this lexer. + Preprocessor *getPP() const { return PP; } + + FileID getFileID() const { + assert(PP && + "PreprocessorLexer::getFileID() should only be used with a Preprocessor"); + return FID; + } + + /// getFileEntry - Return the FileEntry corresponding to this FileID. Like + /// getFileID(), this only works for lexers with attached preprocessors. + const FileEntry *getFileEntry() const; +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/ScratchBuffer.h b/include/clang/Lex/ScratchBuffer.h new file mode 100644 index 0000000000000..6506f9262947c --- /dev/null +++ b/include/clang/Lex/ScratchBuffer.h @@ -0,0 +1,45 @@ +//===--- ScratchBuffer.h - Scratch space for forming tokens -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScratchBuffer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SCRATCHBUFFER_H +#define LLVM_CLANG_SCRATCHBUFFER_H + +#include "clang/Basic/SourceLocation.h" + +namespace clang { + class SourceManager; + +/// ScratchBuffer - This class exposes a simple interface for the dynamic +/// construction of tokens. This is used for builtin macros (e.g. __LINE__) as +/// well as token pasting, etc. +class ScratchBuffer { + SourceManager &SourceMgr; + char *CurBuffer; + SourceLocation BufferStartLoc; + unsigned BytesUsed; +public: + ScratchBuffer(SourceManager &SM); + + /// getToken - Splat the specified text into a temporary MemoryBuffer and + /// return a SourceLocation that refers to the token. This is just like the + /// previous method, but returns a location that indicates the physloc of the + /// token. + SourceLocation getToken(const char *Buf, unsigned Len, const char *&DestPtr); + +private: + void AllocScratchBuffer(unsigned RequestLen); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h new file mode 100644 index 0000000000000..2c8f2ad3f2b62 --- /dev/null +++ b/include/clang/Lex/Token.h @@ -0,0 +1,312 @@ +//===--- Token.h - Token interface ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Token interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOKEN_H +#define LLVM_CLANG_TOKEN_H + +#include "clang/Basic/TemplateKinds.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Basic/SourceLocation.h" +#include <cstdlib> + +namespace clang { + +class IdentifierInfo; + +/// Token - This structure provides full information about a lexed token. +/// It is not intended to be space efficient, it is intended to return as much +/// information as possible about each returned token. This is expected to be +/// compressed into a smaller form if memory footprint is important. +/// +/// The parser can create a special "annotation token" representing a stream of +/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" +/// can be represented by a single typename annotation token that carries +/// information about the SourceRange of the tokens and the type object. +class Token { + /// The location of the token. + SourceLocation Loc; + + // Conceptually these next two fields could be in a union. However, this + // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical + // routine. Keeping as separate members with casts until a more beautiful fix + // presents itself. + + /// UintData - This holds either the length of the token text, when + /// a normal token, or the end of the SourceRange when an annotation + /// token. + unsigned UintData; + + /// PtrData - This is a union of four different pointer types, which depends + /// on what type of token this is: + /// Identifiers, keywords, etc: + /// This is an IdentifierInfo*, which contains the uniqued identifier + /// spelling. + /// Literals: isLiteral() returns true. + /// This is a pointer to the start of the token in a text buffer, which + /// may be dirty (have trigraphs / escaped newlines). + /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). + /// This is a pointer to sema-specific data for the annotation token. + /// Other: + /// This is null. + void *PtrData; + + /// Kind - The actual flavor of token this is. + /// + unsigned Kind : 8; // DON'T make Kind a 'tok::TokenKind'; + // MSVC will treat it as a signed char and + // TokenKinds > 127 won't be handled correctly. + + /// Flags - Bits we track about this token, members of the TokenFlags enum. + unsigned Flags : 8; +public: + + // Various flags set per token: + enum TokenFlags { + StartOfLine = 0x01, // At start of line or only after whitespace. + LeadingSpace = 0x02, // Whitespace exists before this token. + DisableExpand = 0x04, // This identifier may never be macro expanded. + NeedsCleaning = 0x08 // Contained an escaped newline or trigraph. + }; + + tok::TokenKind getKind() const { return (tok::TokenKind)Kind; } + void setKind(tok::TokenKind K) { Kind = K; } + + /// is/isNot - Predicates to check if this token is a specific kind, as in + /// "if (Tok.is(tok::l_brace)) {...}". + bool is(tok::TokenKind K) const { return Kind == (unsigned) K; } + bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; } + + /// isLiteral - Return true if this is a "literal", like a numeric + /// constant, string, etc. + bool isLiteral() const { + return is(tok::numeric_constant) || is(tok::char_constant) || + is(tok::string_literal) || is(tok::wide_string_literal) || + is(tok::angle_string_literal); + } + + bool isAnnotation() const { + return is(tok::annot_typename) || + is(tok::annot_cxxscope) || + is(tok::annot_template_id); + } + + /// getLocation - Return a source location identifier for the specified + /// offset in the current file. + SourceLocation getLocation() const { return Loc; } + unsigned getLength() const { + assert(!isAnnotation() && "Annotation tokens have no length field"); + return UintData; + } + + void setLocation(SourceLocation L) { Loc = L; } + void setLength(unsigned Len) { + assert(!isAnnotation() && "Annotation tokens have no length field"); + UintData = Len; + } + + SourceLocation getAnnotationEndLoc() const { + assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); + return SourceLocation::getFromRawEncoding(UintData); + } + void setAnnotationEndLoc(SourceLocation L) { + assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); + UintData = L.getRawEncoding(); + } + + /// getAnnotationRange - SourceRange of the group of tokens that this + /// annotation token represents. + SourceRange getAnnotationRange() const { + return SourceRange(getLocation(), getAnnotationEndLoc()); + } + void setAnnotationRange(SourceRange R) { + setLocation(R.getBegin()); + setAnnotationEndLoc(R.getEnd()); + } + + const char *getName() const { + return tok::getTokenName( (tok::TokenKind) Kind); + } + + /// startToken - Reset all flags to cleared. + /// + void startToken() { + Kind = tok::unknown; + Flags = 0; + PtrData = 0; + Loc = SourceLocation(); + } + + IdentifierInfo *getIdentifierInfo() const { + assert(!isAnnotation() && "Used IdentInfo on annotation token!"); + if (isLiteral()) return 0; + return (IdentifierInfo*) PtrData; + } + void setIdentifierInfo(IdentifierInfo *II) { + PtrData = (void*) II; + } + + /// getLiteralData - For a literal token (numeric constant, string, etc), this + /// returns a pointer to the start of it in the text buffer if known, null + /// otherwise. + const char *getLiteralData() const { + assert(isLiteral() && "Cannot get literal data of non-literal"); + return reinterpret_cast<const char*>(PtrData); + } + void setLiteralData(const char *Ptr) { + assert(isLiteral() && "Cannot set literal data of non-literal"); + PtrData = (void*)Ptr; + } + + void *getAnnotationValue() const { + assert(isAnnotation() && "Used AnnotVal on non-annotation token"); + return PtrData; + } + void setAnnotationValue(void *val) { + assert(isAnnotation() && "Used AnnotVal on non-annotation token"); + PtrData = val; + } + + /// setFlag - Set the specified flag. + void setFlag(TokenFlags Flag) { + Flags |= Flag; + } + + /// clearFlag - Unset the specified flag. + void clearFlag(TokenFlags Flag) { + Flags &= ~Flag; + } + + /// getFlags - Return the internal represtation of the flags. + /// Only intended for low-level operations such as writing tokens to + // disk. + unsigned getFlags() const { + return Flags; + } + + /// setFlagValue - Set a flag to either true or false. + void setFlagValue(TokenFlags Flag, bool Val) { + if (Val) + setFlag(Flag); + else + clearFlag(Flag); + } + + /// isAtStartOfLine - Return true if this token is at the start of a line. + /// + bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; } + + /// hasLeadingSpace - Return true if this token has whitespace before it. + /// + bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; } + + /// isExpandDisabled - Return true if this identifier token should never + /// be expanded in the future, due to C99 6.10.3.4p2. + bool isExpandDisabled() const { + return (Flags & DisableExpand) ? true : false; + } + + /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. + bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; + + /// getObjCKeywordID - Return the ObjC keyword kind. + tok::ObjCKeywordKind getObjCKeywordID() const; + + /// needsCleaning - Return true if this token has trigraphs or escaped + /// newlines in it. + /// + bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; } +}; + +/// PPConditionalInfo - Information about the conditional stack (#if directives) +/// currently active. +struct PPConditionalInfo { + /// IfLoc - Location where the conditional started. + /// + SourceLocation IfLoc; + + /// WasSkipping - True if this was contained in a skipping directive, e.g. + /// in a "#if 0" block. + bool WasSkipping; + + /// FoundNonSkip - True if we have emitted tokens already, and now we're in + /// an #else block or something. Only useful in Skipping blocks. + bool FoundNonSkip; + + /// FoundElse - True if we've seen a #else in this block. If so, + /// #elif/#else directives are not allowed. + bool FoundElse; +}; + +/// TemplateIdAnnotation - Information about a template-id annotation +/// token, which contains the template declaration, template +/// arguments, whether those template arguments were types or +/// expressions, and the source locations for important tokens. All of +/// the information about template arguments is allocated directly +/// after this structure. +struct TemplateIdAnnotation { + /// TemplateNameLoc - The location of the template name within the + /// source. + SourceLocation TemplateNameLoc; + + /// FIXME: Temporarily stores the name of a specialization + IdentifierInfo *Name; + + /// The declaration of the template corresponding to the + /// template-name. This is an Action::DeclTy*. + void *Template; + + /// The kind of template that Template refers to. + TemplateNameKind Kind; + + /// The location of the '<' before the template argument + /// list. + SourceLocation LAngleLoc; + + /// The location of the '>' after the template argument + /// list. + SourceLocation RAngleLoc; + + /// NumArgs - The number of template arguments. + unsigned NumArgs; + + /// \brief Retrieves a pointer to the template arguments + void **getTemplateArgs() { return (void **)(this + 1); } + + /// \brief Retrieves a pointer to the array of template argument + /// locations. + SourceLocation *getTemplateArgLocations() { + return (SourceLocation *)(getTemplateArgs() + NumArgs); + } + + /// \brief Retrieves a pointer to the array of flags that states + /// whether the template arguments are types. + bool *getTemplateArgIsType() { + return (bool *)(getTemplateArgLocations() + NumArgs); + } + + static TemplateIdAnnotation* Allocate(unsigned NumArgs) { + TemplateIdAnnotation *TemplateId + = (TemplateIdAnnotation *)std::malloc(sizeof(TemplateIdAnnotation) + + sizeof(void*) * NumArgs + + sizeof(SourceLocation) * NumArgs + + sizeof(bool) * NumArgs); + TemplateId->NumArgs = NumArgs; + return TemplateId; + } + + void Destroy() { free(this); } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/TokenConcatenation.h b/include/clang/Lex/TokenConcatenation.h new file mode 100644 index 0000000000000..dfc05f4074e0a --- /dev/null +++ b/include/clang/Lex/TokenConcatenation.h @@ -0,0 +1,73 @@ +//===--- TokenConcatenation.h - Token Concatenation Avoidance ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TokenConcatenation class. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LEX_TOKEN_CONCATENATION_H +#define CLANG_LEX_TOKEN_CONCATENATION_H + +#include "clang/Basic/TokenKinds.h" + +namespace clang { + class Preprocessor; + class Token; + + /// TokenConcatenation class, which answers the question of + /// "Is it safe to emit two tokens without a whitespace between them, or + /// would that cause implicit concatenation of the tokens?" + /// + /// For example, it emitting two identifiers "foo" and "bar" next to each + /// other would cause the lexer to produce one "foobar" token. Emitting "1" + /// and ")" next to each other is safe. + /// + class TokenConcatenation { + Preprocessor &PP; + + enum AvoidConcatInfo { + /// By default, a token never needs to avoid concatenation. Most tokens + /// (e.g. ',', ')', etc) don't cause a problem when concatenated. + aci_never_avoid_concat = 0, + + /// aci_custom_firstchar - AvoidConcat contains custom code to handle this + /// token's requirements, and it needs to know the first character of the + /// token. + aci_custom_firstchar = 1, + + /// aci_custom - AvoidConcat contains custom code to handle this token's + /// requirements, but it doesn't need to know the first character of the + /// token. + aci_custom = 2, + + /// aci_avoid_equal - Many tokens cannot be safely followed by an '=' + /// character. For example, "<<" turns into "<<=" when followed by an =. + aci_avoid_equal = 4 + }; + + /// TokenInfo - This array contains information for each token on what + /// action to take when avoiding concatenation of tokens in the AvoidConcat + /// method. + char TokenInfo[tok::NUM_TOKENS]; + public: + TokenConcatenation(Preprocessor &PP); + + bool AvoidConcat(const Token &PrevTok, const Token &Tok) const; + + private: + /// StartsWithL - Return true if the spelling of this token starts with 'L'. + bool StartsWithL(const Token &Tok) const; + + /// IsIdentifierL - Return true if the spelling of this token is literally + /// 'L'. + bool IsIdentifierL(const Token &Tok) const; + }; + } // end clang namespace + +#endif diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h new file mode 100644 index 0000000000000..c0a61cf93ee55 --- /dev/null +++ b/include/clang/Lex/TokenLexer.h @@ -0,0 +1,154 @@ +//===--- TokenLexer.h - Lex from a token buffer -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TokenLexer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOKENLEXER_H +#define LLVM_CLANG_TOKENLEXER_H + +#include "clang/Basic/SourceLocation.h" + +namespace clang { + class MacroInfo; + class Preprocessor; + class Token; + class MacroArgs; + +/// TokenLexer - This implements a lexer that returns token from a macro body +/// or token stream instead of lexing from a character buffer. This is used for +/// macro expansion and _Pragma handling, for example. +/// +class TokenLexer { + /// Macro - The macro we are expanding from. This is null if expanding a + /// token stream. + /// + MacroInfo *Macro; + + /// ActualArgs - The actual arguments specified for a function-like macro, or + /// null. The TokenLexer owns the pointed-to object. + MacroArgs *ActualArgs; + + /// PP - The current preprocessor object we are expanding for. + /// + Preprocessor &PP; + + /// Tokens - This is the pointer to an array of tokens that the macro is + /// defined to, with arguments expanded for function-like macros. If this is + /// a token stream, these are the tokens we are returning. This points into + /// the macro definition we are lexing from, a scratch buffer allocated from + /// the preprocessor's bump pointer allocator, or some other buffer that we + /// may or may not own (depending on OwnsTokens). + const Token *Tokens; + + /// NumTokens - This is the length of the Tokens array. + /// + unsigned NumTokens; + + /// CurToken - This is the next token that Lex will return. + /// + unsigned CurToken; + + /// InstantiateLocStart/End - The source location range where this macro was + /// instantiated. + SourceLocation InstantiateLocStart, InstantiateLocEnd; + + /// Lexical information about the expansion point of the macro: the identifier + /// that the macro expanded from had these properties. + bool AtStartOfLine : 1; + bool HasLeadingSpace : 1; + + /// OwnsTokens - This is true if this TokenLexer allocated the Tokens + /// array, and thus needs to free it when destroyed. For simple object-like + /// macros (for example) we just point into the token buffer of the macro + /// definition, we don't make a copy of it. + bool OwnsTokens : 1; + + /// DisableMacroExpansion - This is true when tokens lexed from the TokenLexer + /// should not be subject to further macro expansion. + bool DisableMacroExpansion : 1; + + TokenLexer(const TokenLexer&); // DO NOT IMPLEMENT + void operator=(const TokenLexer&); // DO NOT IMPLEMENT +public: + /// Create a TokenLexer for the specified macro with the specified actual + /// arguments. Note that this ctor takes ownership of the ActualArgs pointer. + /// ILEnd specifies the location of the ')' for a function-like macro or the + /// identifier for an object-like macro. + TokenLexer(Token &Tok, SourceLocation ILEnd, MacroArgs *ActualArgs, + Preprocessor &pp) + : Macro(0), ActualArgs(0), PP(pp), OwnsTokens(false) { + Init(Tok, ILEnd, ActualArgs); + } + + /// Init - Initialize this TokenLexer to expand from the specified macro + /// with the specified argument information. Note that this ctor takes + /// ownership of the ActualArgs pointer. ILEnd specifies the location of the + /// ')' for a function-like macro or the identifier for an object-like macro. + void Init(Token &Tok, SourceLocation ILEnd, MacroArgs *ActualArgs); + + /// Create a TokenLexer for the specified token stream. If 'OwnsTokens' is + /// specified, this takes ownership of the tokens and delete[]'s them when + /// the token lexer is empty. + TokenLexer(const Token *TokArray, unsigned NumToks, bool DisableExpansion, + bool ownsTokens, Preprocessor &pp) + : Macro(0), ActualArgs(0), PP(pp), OwnsTokens(false) { + Init(TokArray, NumToks, DisableExpansion, ownsTokens); + } + + /// Init - Initialize this TokenLexer with the specified token stream. + /// This does not take ownership of the specified token vector. + /// + /// DisableExpansion is true when macro expansion of tokens lexed from this + /// stream should be disabled. + void Init(const Token *TokArray, unsigned NumToks, + bool DisableMacroExpansion, bool OwnsTokens); + + ~TokenLexer() { destroy(); } + + /// isNextTokenLParen - If the next token lexed will pop this macro off the + /// expansion stack, return 2. If the next unexpanded token is a '(', return + /// 1, otherwise return 0. + unsigned isNextTokenLParen() const; + + /// Lex - Lex and return a token from this macro stream. + void Lex(Token &Tok); + +private: + void destroy(); + + /// isAtEnd - Return true if the next lex call will pop this macro off the + /// include stack. + bool isAtEnd() const { + return CurToken == NumTokens; + } + + /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## + /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there + /// are is another ## after it, chomp it iteratively. Return the result as + /// Tok. If this returns true, the caller should immediately return the + /// token. + bool PasteTokens(Token &Tok); + + /// Expand the arguments of a function-like macro so that we can quickly + /// return preexpanded tokens from Tokens. + void ExpandFunctionArguments(); + + /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes + /// together to form a comment that comments out everything in the current + /// macro, other active macros, and anything left on the current physical + /// source line of the instantiated buffer. Handle this by returning the + /// first token on the next line. + void HandleMicrosoftCommentPaste(Token &Tok); +}; + +} // end namespace clang + +#endif |