diff options
Diffstat (limited to 'lib/Rewrite/InclusionRewriter.cpp')
| -rw-r--r-- | lib/Rewrite/InclusionRewriter.cpp | 361 | 
1 files changed, 361 insertions, 0 deletions
diff --git a/lib/Rewrite/InclusionRewriter.cpp b/lib/Rewrite/InclusionRewriter.cpp new file mode 100644 index 000000000000..3dfc3b008987 --- /dev/null +++ b/lib/Rewrite/InclusionRewriter.cpp @@ -0,0 +1,361 @@ +//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This code rewrites include invocations into their expansions.  This gives you +// a file with all included files merged into it. +// +//===----------------------------------------------------------------------===// + +#include "clang/Rewrite/Rewriters.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/PreprocessorOutputOptions.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace llvm; + +namespace { + +class InclusionRewriter : public PPCallbacks { +  /// Information about which #includes were actually performed, +  /// created by preprocessor callbacks. +  struct FileChange { +    SourceLocation From; +    FileID Id; +    SrcMgr::CharacteristicKind FileType; +    FileChange(SourceLocation From) : From(From) { +    } +  }; +  Preprocessor &PP; ///< Used to find inclusion directives. +  SourceManager &SM; ///< Used to read and manage source files. +  raw_ostream &OS; ///< The destination stream for rewritten contents. +  bool ShowLineMarkers; ///< Show #line markers. +  bool UseLineDirective; ///< Use of line directives or line markers. +  typedef std::map<unsigned, FileChange> FileChangeMap; +  FileChangeMap FileChanges; /// Tracks which files were included where. +  /// Used transitively for building up the FileChanges mapping over the +  /// various \c PPCallbacks callbacks. +  FileChangeMap::iterator LastInsertedFileChange; +public: +  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); +  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); +private: +  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, +                           SrcMgr::CharacteristicKind FileType, +                           FileID PrevFID); +  virtual void FileSkipped(const FileEntry &ParentFile, +                           const Token &FilenameTok, +                           SrcMgr::CharacteristicKind FileType); +  virtual void InclusionDirective(SourceLocation HashLoc, +                                  const Token &IncludeTok, +                                  StringRef FileName, +                                  bool IsAngled, +                                  const FileEntry *File, +                                  SourceLocation EndLoc, +                                  StringRef SearchPath, +                                  StringRef RelativePath); +  void WriteLineInfo(const char *Filename, int Line, +                     SrcMgr::CharacteristicKind FileType, +                     StringRef EOL, StringRef Extra = StringRef()); +  void OutputContentUpTo(const MemoryBuffer &FromFile, +                         unsigned &WriteFrom, unsigned WriteTo, +                         StringRef EOL, int &lines, +                         bool EnsureNewline = false); +  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, +                           const MemoryBuffer &FromFile, StringRef EOL, +                           unsigned &NextToWrite, int &Lines); +  const FileChange *FindFileChangeLocation(SourceLocation Loc) const; +  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); +}; + +}  // end anonymous namespace + +/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. +InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, +                                     bool ShowLineMarkers) +    : PP(PP), SM(PP.getSourceManager()), OS(OS), +    ShowLineMarkers(ShowLineMarkers), +    LastInsertedFileChange(FileChanges.end()) { +  // If we're in microsoft mode, use normal #line instead of line markers. +  UseLineDirective = PP.getLangOpts().MicrosoftExt; +} + +/// Write appropriate line information as either #line directives or GNU line +/// markers depending on what mode we're in, including the \p Filename and +/// \p Line we are located at, using the specified \p EOL line separator, and +/// any \p Extra context specifiers in GNU line directives. +void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, +                                      SrcMgr::CharacteristicKind FileType, +                                      StringRef EOL, StringRef Extra) { +  if (!ShowLineMarkers) +    return; +  if (UseLineDirective) { +    OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; +  } else { +    // Use GNU linemarkers as described here: +    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html +    OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; +    if (!Extra.empty()) +      OS << Extra; +    if (FileType == SrcMgr::C_System) +      // "`3' This indicates that the following text comes from a system header +      // file, so certain warnings should be suppressed." +      OS << " 3"; +    else if (FileType == SrcMgr::C_ExternCSystem) +      // as above for `3', plus "`4' This indicates that the following text +      // should be treated as being wrapped in an implicit extern "C" block." +      OS << " 3 4"; +  } +  OS << EOL; +} + +/// FileChanged - Whenever the preprocessor enters or exits a #include file +/// it invokes this handler. +void InclusionRewriter::FileChanged(SourceLocation Loc, +                                    FileChangeReason Reason, +                                    SrcMgr::CharacteristicKind NewFileType, +                                    FileID) { +  if (Reason != EnterFile) +    return; +  if (LastInsertedFileChange == FileChanges.end()) +    // we didn't reach this file (eg: the main file) via an inclusion directive +    return; +  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); +  LastInsertedFileChange->second.FileType = NewFileType; +  LastInsertedFileChange = FileChanges.end(); +} + +/// Called whenever an inclusion is skipped due to canonical header protection +/// macros. +void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, +                                    const Token &/*FilenameTok*/, +                                    SrcMgr::CharacteristicKind /*FileType*/) { +  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " +    "found via an inclusion directive, was skipped"); +  FileChanges.erase(LastInsertedFileChange); +  LastInsertedFileChange = FileChanges.end(); +} + +/// This should be called whenever the preprocessor encounters include +/// directives. It does not say whether the file has been included, but it +/// provides more information about the directive (hash location instead +/// of location inside the included file). It is assumed that the matching +/// FileChanged() or FileSkipped() is called after this. +void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, +                                           const Token &/*IncludeTok*/, +                                           StringRef /*FileName*/, +                                           bool /*IsAngled*/, +                                           const FileEntry * /*File*/, +                                           SourceLocation /*EndLoc*/, +                                           StringRef /*SearchPath*/, +                                           StringRef /*RelativePath*/) { +  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " +    "directive was found before the previous one was processed"); +  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( +    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc))); +  assert(p.second && "Unexpected revisitation of the same include directive"); +  LastInsertedFileChange = p.first; +} + +/// Simple lookup for a SourceLocation (specifically one denoting the hash in +/// an inclusion directive) in the map of inclusion information, FileChanges. +const InclusionRewriter::FileChange * +InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { +  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); +  if (I != FileChanges.end()) +    return &I->second; +  return NULL; +} + +/// Detect the likely line ending style of \p FromFile by examining the first +/// newline found within it. +static StringRef DetectEOL(const MemoryBuffer &FromFile) { +  // detect what line endings the file uses, so that added content does not mix +  // the style +  const char *Pos = strchr(FromFile.getBufferStart(), '\n'); +  if (Pos == NULL) +    return "\n"; +  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') +    return "\n\r"; +  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') +    return "\r\n"; +  return "\n"; +} + +/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at +/// \p WriteTo - 1. +void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, +                                          unsigned &WriteFrom, unsigned WriteTo, +                                          StringRef EOL, int &Line, +                                          bool EnsureNewline) { +  if (WriteTo <= WriteFrom) +    return; +  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); +  // count lines manually, it's faster than getPresumedLoc() +  Line += std::count(FromFile.getBufferStart() + WriteFrom, +                     FromFile.getBufferStart() + WriteTo, '\n'); +  if (EnsureNewline) { +    char LastChar = FromFile.getBufferStart()[WriteTo - 1]; +    if (LastChar != '\n' && LastChar != '\r') +      OS << EOL; +  } +  WriteFrom = WriteTo; +} + +/// Print characters from \p FromFile starting at \p NextToWrite up until the +/// inclusion directive at \p StartToken, then print out the inclusion +/// inclusion directive disabled by a #if directive, updating \p NextToWrite +/// and \p Line to track the number of source lines visited and the progress +/// through the \p FromFile buffer. +void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, +                                            const Token &StartToken, +                                            const MemoryBuffer &FromFile, +                                            StringRef EOL, +                                            unsigned &NextToWrite, int &Line) { +  OutputContentUpTo(FromFile, NextToWrite, +    SM.getFileOffset(StartToken.getLocation()), EOL, Line); +  Token DirectiveToken; +  do { +    DirectiveLex.LexFromRawLexer(DirectiveToken); +  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); +  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; +  OutputContentUpTo(FromFile, NextToWrite, +    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), +    EOL, Line); +  OS << "#endif /* expanded by -frewrite-includes */" << EOL; +} + +/// Find the next identifier in the pragma directive specified by \p RawToken. +StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, +                                                Token &RawToken) { +  RawLex.LexFromRawLexer(RawToken); +  if (RawToken.is(tok::raw_identifier)) +    PP.LookUpIdentifierInfo(RawToken); +  if (RawToken.is(tok::identifier)) +    return RawToken.getIdentifierInfo()->getName(); +  return StringRef(); +} + +/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it +/// and including content of included files recursively. +bool InclusionRewriter::Process(FileID FileId, +                                SrcMgr::CharacteristicKind FileType) +{ +  bool Invalid; +  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); +  if (Invalid) // invalid inclusion +    return true; +  const char *FileName = FromFile.getBufferIdentifier(); +  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); +  RawLex.SetCommentRetentionState(false); + +  StringRef EOL = DetectEOL(FromFile); + +  // Per the GNU docs: "1" indicates the start of a new file. +  WriteLineInfo(FileName, 1, FileType, EOL, " 1"); + +  if (SM.getFileIDSize(FileId) == 0) +    return true; + +  // The next byte to be copied from the source file +  unsigned NextToWrite = 0; +  int Line = 1; // The current input file line number. + +  Token RawToken; +  RawLex.LexFromRawLexer(RawToken); + +  // TODO: Consider adding a switch that strips possibly unimportant content, +  // such as comments, to reduce the size of repro files. +  while (RawToken.isNot(tok::eof)) { +    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { +      RawLex.setParsingPreprocessorDirective(true); +      Token HashToken = RawToken; +      RawLex.LexFromRawLexer(RawToken); +      if (RawToken.is(tok::raw_identifier)) +        PP.LookUpIdentifierInfo(RawToken); +      if (RawToken.is(tok::identifier)) { +        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { +          case tok::pp_include: +          case tok::pp_include_next: +          case tok::pp_import: { +            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, +              Line); +            if (const FileChange *Change = FindFileChangeLocation( +                HashToken.getLocation())) { +              // now include and recursively process the file +              if (Process(Change->Id, Change->FileType)) +                // and set lineinfo back to this file, if the nested one was +                // actually included +                // `2' indicates returning to a file (after having included +                // another file. +                WriteLineInfo(FileName, Line, FileType, EOL, " 2"); +            } else +              // fix up lineinfo (since commented out directive changed line +              // numbers) for inclusions that were skipped due to header guards +              WriteLineInfo(FileName, Line, FileType, EOL); +            break; +          } +          case tok::pp_pragma: { +            StringRef Identifier = NextIdentifierName(RawLex, RawToken); +            if (Identifier == "clang" || Identifier == "GCC") { +              if (NextIdentifierName(RawLex, RawToken) == "system_header") { +                // keep the directive in, commented out +                CommentOutDirective(RawLex, HashToken, FromFile, EOL, +                  NextToWrite, Line); +                // update our own type +                FileType = SM.getFileCharacteristic(RawToken.getLocation()); +                WriteLineInfo(FileName, Line, FileType, EOL); +              } +            } else if (Identifier == "once") { +              // keep the directive in, commented out +              CommentOutDirective(RawLex, HashToken, FromFile, EOL, +                NextToWrite, Line); +              WriteLineInfo(FileName, Line, FileType, EOL); +            } +            break; +          } +          default: +            break; +        } +      } +      RawLex.setParsingPreprocessorDirective(false); +    } +    RawLex.LexFromRawLexer(RawToken); +  } +  OutputContentUpTo(FromFile, NextToWrite, +    SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line, +    /*EnsureNewline*/true); +  return true; +} + +/// InclusionRewriterInInput - Implement -frewrite-includes mode. +void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, +                                   const PreprocessorOutputOptions &Opts) { +  SourceManager &SM = PP.getSourceManager(); +  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, +                                                     Opts.ShowLineMarkers); +  PP.addPPCallbacks(Rewrite); + +  // First let the preprocessor process the entire file and call callbacks. +  // Callbacks will record which #include's were actually performed. +  PP.EnterMainSourceFile(); +  Token Tok; +  // Only preprocessor directives matter here, so disable macro expansion +  // everywhere else as an optimization. +  // TODO: It would be even faster if the preprocessor could be switched +  // to a mode where it would parse only preprocessor directives and comments, +  // nothing else matters for parsing or processing. +  PP.SetMacroExpansionOnlyInDirectives(); +  do { +    PP.Lex(Tok); +  } while (Tok.isNot(tok::eof)); +  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); +  OS->flush(); +}  | 
