diff options
Diffstat (limited to 'lib/Tooling/InterpolatingCompilationDatabase.cpp')
-rw-r--r-- | lib/Tooling/InterpolatingCompilationDatabase.cpp | 277 |
1 files changed, 183 insertions, 94 deletions
diff --git a/lib/Tooling/InterpolatingCompilationDatabase.cpp b/lib/Tooling/InterpolatingCompilationDatabase.cpp index bc564584bd01a..4d0d84f660a2c 100644 --- a/lib/Tooling/InterpolatingCompilationDatabase.cpp +++ b/lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -48,6 +48,7 @@ #include "clang/Frontend/LangStandard.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" @@ -123,55 +124,79 @@ static types::ID foldType(types::ID Lang) { struct TransferableCommand { // Flags that should not apply to all files are stripped from CommandLine. CompileCommand Cmd; - // Language detected from -x or the filename. - types::ID Type = types::TY_INVALID; + // Language detected from -x or the filename. Never TY_INVALID. + Optional<types::ID> Type; // Standard specified by -std. LangStandard::Kind Std = LangStandard::lang_unspecified; + // Whether the command line is for the cl-compatible driver. + bool ClangCLMode; TransferableCommand(CompileCommand C) - : Cmd(std::move(C)), Type(guessType(Cmd.Filename)) { - std::vector<std::string> NewArgs = {Cmd.CommandLine.front()}; + : Cmd(std::move(C)), Type(guessType(Cmd.Filename)), + ClangCLMode(checkIsCLMode(Cmd.CommandLine)) { + std::vector<std::string> OldArgs = std::move(Cmd.CommandLine); + Cmd.CommandLine.clear(); + + // Wrap the old arguments in an InputArgList. + llvm::opt::InputArgList ArgList; + { + SmallVector<const char *, 16> TmpArgv; + for (const std::string &S : OldArgs) + TmpArgv.push_back(S.c_str()); + ArgList = {TmpArgv.begin(), TmpArgv.end()}; + } + // Parse the old args in order to strip out and record unwanted flags. + // We parse each argument individually so that we can retain the exact + // spelling of each argument; re-rendering is lossy for aliased flags. + // E.g. in CL mode, /W4 maps to -Wall. auto OptTable = clang::driver::createDriverOptTable(); - std::vector<const char *> Argv; - for (unsigned I = 1; I < Cmd.CommandLine.size(); ++I) - Argv.push_back(Cmd.CommandLine[I].c_str()); - unsigned MissingI, MissingC; - auto ArgList = OptTable->ParseArgs(Argv, MissingI, MissingC); - for (const auto *Arg : ArgList) { - const auto &option = Arg->getOption(); + Cmd.CommandLine.emplace_back(OldArgs.front()); + for (unsigned Pos = 1; Pos < OldArgs.size();) { + using namespace driver::options; + + const unsigned OldPos = Pos; + std::unique_ptr<llvm::opt::Arg> Arg(OptTable->ParseOneArg( + ArgList, Pos, + /* Include */ClangCLMode ? CoreOption | CLOption : 0, + /* Exclude */ClangCLMode ? 0 : CLOption)); + + if (!Arg) + continue; + + const llvm::opt::Option &Opt = Arg->getOption(); + // Strip input and output files. - if (option.matches(clang::driver::options::OPT_INPUT) || - option.matches(clang::driver::options::OPT_o)) { + if (Opt.matches(OPT_INPUT) || Opt.matches(OPT_o) || + (ClangCLMode && (Opt.matches(OPT__SLASH_Fa) || + Opt.matches(OPT__SLASH_Fe) || + Opt.matches(OPT__SLASH_Fi) || + Opt.matches(OPT__SLASH_Fo)))) continue; - } + // Strip -x, but record the overridden language. - if (option.matches(clang::driver::options::OPT_x)) { - for (const char *Value : Arg->getValues()) - Type = types::lookupTypeForTypeSpecifier(Value); + if (const auto GivenType = tryParseTypeArg(*Arg)) { + Type = *GivenType; continue; } - // Strip --std, but record the value. - if (option.matches(clang::driver::options::OPT_std_EQ)) { - for (const char *Value : Arg->getValues()) { - Std = llvm::StringSwitch<LangStandard::Kind>(Value) -#define LANGSTANDARD(id, name, lang, desc, features) \ - .Case(name, LangStandard::lang_##id) -#define LANGSTANDARD_ALIAS(id, alias) .Case(alias, LangStandard::lang_##id) -#include "clang/Frontend/LangStandards.def" - .Default(Std); - } + + // Strip -std, but record the value. + if (const auto GivenStd = tryParseStdArg(*Arg)) { + if (*GivenStd != LangStandard::lang_unspecified) + Std = *GivenStd; continue; } - llvm::opt::ArgStringList ArgStrs; - Arg->render(ArgList, ArgStrs); - NewArgs.insert(NewArgs.end(), ArgStrs.begin(), ArgStrs.end()); + + Cmd.CommandLine.insert(Cmd.CommandLine.end(), + OldArgs.data() + OldPos, OldArgs.data() + Pos); } - Cmd.CommandLine = std::move(NewArgs); if (Std != LangStandard::lang_unspecified) // -std take precedence over -x Type = toType(LangStandard::getLangStandardForKind(Std).getLanguage()); - Type = foldType(Type); + Type = foldType(*Type); + // The contract is to store None instead of TY_INVALID. + if (Type == types::TY_INVALID) + Type = llvm::None; } // Produce a CompileCommand for \p filename, based on this one. @@ -181,25 +206,43 @@ struct TransferableCommand { bool TypeCertain; auto TargetType = guessType(Filename, &TypeCertain); // If the filename doesn't determine the language (.h), transfer with -x. - if (!TypeCertain) { + if (TargetType != types::TY_INVALID && !TypeCertain && Type) { TargetType = types::onlyPrecompileType(TargetType) // header? - ? types::lookupHeaderTypeForSourceType(Type) - : Type; - Result.CommandLine.push_back("-x"); - Result.CommandLine.push_back(types::getTypeName(TargetType)); + ? types::lookupHeaderTypeForSourceType(*Type) + : *Type; + if (ClangCLMode) { + const StringRef Flag = toCLFlag(TargetType); + if (!Flag.empty()) + Result.CommandLine.push_back(Flag); + } else { + Result.CommandLine.push_back("-x"); + Result.CommandLine.push_back(types::getTypeName(TargetType)); + } } // --std flag may only be transferred if the language is the same. // We may consider "translating" these, e.g. c++11 -> c11. if (Std != LangStandard::lang_unspecified && foldType(TargetType) == Type) { - Result.CommandLine.push_back( - "-std=" + - std::string(LangStandard::getLangStandardForKind(Std).getName())); + Result.CommandLine.emplace_back(( + llvm::Twine(ClangCLMode ? "/std:" : "-std=") + + LangStandard::getLangStandardForKind(Std).getName()).str()); } Result.CommandLine.push_back(Filename); return Result; } private: + // Determine whether the given command line is intended for the CL driver. + static bool checkIsCLMode(ArrayRef<std::string> CmdLine) { + // First look for --driver-mode. + for (StringRef S : llvm::reverse(CmdLine)) { + if (S.consume_front("--driver-mode=")) + return S == "cl"; + } + + // Otherwise just check the clang executable file name. + return llvm::sys::path::stem(CmdLine.front()).endswith_lower("cl"); + } + // Map the language from the --std flag to that of the -x flag. static types::ID toType(InputKind::Language Lang) { switch (Lang) { @@ -215,64 +258,111 @@ private: return types::TY_INVALID; } } + + // Convert a file type to the matching CL-style type flag. + static StringRef toCLFlag(types::ID Type) { + switch (Type) { + case types::TY_C: + case types::TY_CHeader: + return "/TC"; + case types::TY_CXX: + case types::TY_CXXHeader: + return "/TP"; + default: + return StringRef(); + } + } + + // Try to interpret the argument as a type specifier, e.g. '-x'. + Optional<types::ID> tryParseTypeArg(const llvm::opt::Arg &Arg) { + const llvm::opt::Option &Opt = Arg.getOption(); + using namespace driver::options; + if (ClangCLMode) { + if (Opt.matches(OPT__SLASH_TC) || Opt.matches(OPT__SLASH_Tc)) + return types::TY_C; + if (Opt.matches(OPT__SLASH_TP) || Opt.matches(OPT__SLASH_Tp)) + return types::TY_CXX; + } else { + if (Opt.matches(driver::options::OPT_x)) + return types::lookupTypeForTypeSpecifier(Arg.getValue()); + } + return None; + } + + // Try to interpret the argument as '-std='. + Optional<LangStandard::Kind> tryParseStdArg(const llvm::opt::Arg &Arg) { + using namespace driver::options; + if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ)) { + return llvm::StringSwitch<LangStandard::Kind>(Arg.getValue()) +#define LANGSTANDARD(id, name, lang, ...) .Case(name, LangStandard::lang_##id) +#define LANGSTANDARD_ALIAS(id, alias) .Case(alias, LangStandard::lang_##id) +#include "clang/Frontend/LangStandards.def" +#undef LANGSTANDARD_ALIAS +#undef LANGSTANDARD + .Default(LangStandard::lang_unspecified); + } + return None; + } }; -// CommandIndex does the real work: given a filename, it produces the best -// matching TransferableCommand by matching filenames. Basic strategy: +// Given a filename, FileIndex picks the best matching file from the underlying +// DB. This is the proxy file whose CompileCommand will be reused. The +// heuristics incorporate file name, extension, and directory structure. +// Strategy: // - Build indexes of each of the substrings we want to look up by. // These indexes are just sorted lists of the substrings. -// - Forward requests to the inner CDB. If it fails, we must pick a proxy. // - Each criterion corresponds to a range lookup into the index, so we only // need O(log N) string comparisons to determine scores. -// - We then break ties among the candidates with the highest score. -class CommandIndex { +// +// Apart from path proximity signals, also takes file extensions into account +// when scoring the candidates. +class FileIndex { public: - CommandIndex(std::vector<TransferableCommand> AllCommands) - : Commands(std::move(AllCommands)), Strings(Arena) { + FileIndex(std::vector<std::string> Files) + : OriginalPaths(std::move(Files)), Strings(Arena) { // Sort commands by filename for determinism (index is a tiebreaker later). - llvm::sort( - Commands.begin(), Commands.end(), - [](const TransferableCommand &Left, const TransferableCommand &Right) { - return Left.Cmd.Filename < Right.Cmd.Filename; - }); - for (size_t I = 0; I < Commands.size(); ++I) { - StringRef Path = - Strings.save(StringRef(Commands[I].Cmd.Filename).lower()); - Paths.push_back({Path, I}); + llvm::sort(OriginalPaths); + Paths.reserve(OriginalPaths.size()); + Types.reserve(OriginalPaths.size()); + Stems.reserve(OriginalPaths.size()); + for (size_t I = 0; I < OriginalPaths.size(); ++I) { + StringRef Path = Strings.save(StringRef(OriginalPaths[I]).lower()); + + Paths.emplace_back(Path, I); + Types.push_back(foldType(guessType(Path))); Stems.emplace_back(sys::path::stem(Path), I); auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path); for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir) if (Dir->size() > ShortDirectorySegment) // not trivial ones Components.emplace_back(*Dir, I); } - llvm::sort(Paths.begin(), Paths.end()); - llvm::sort(Stems.begin(), Stems.end()); - llvm::sort(Components.begin(), Components.end()); + llvm::sort(Paths); + llvm::sort(Stems); + llvm::sort(Components); } - bool empty() const { return Commands.empty(); } + bool empty() const { return Paths.empty(); } - // Returns the command that best fits OriginalFilename. - // Candidates with PreferLanguage will be chosen over others (unless it's - // TY_INVALID, or all candidates are bad). - const TransferableCommand &chooseProxy(StringRef OriginalFilename, - types::ID PreferLanguage) const { + // Returns the path for the file that best fits OriginalFilename. + // Candidates with extensions matching PreferLanguage will be chosen over + // others (unless it's TY_INVALID, or all candidates are bad). + StringRef chooseProxy(StringRef OriginalFilename, + types::ID PreferLanguage) const { assert(!empty() && "need at least one candidate!"); std::string Filename = OriginalFilename.lower(); auto Candidates = scoreCandidates(Filename); std::pair<size_t, int> Best = pickWinner(Candidates, Filename, PreferLanguage); - DEBUG_WITH_TYPE("interpolate", - llvm::dbgs() - << "interpolate: chose " - << Commands[Best.first].Cmd.Filename << " as proxy for " - << OriginalFilename << " preferring " - << (PreferLanguage == types::TY_INVALID - ? "none" - : types::getTypeName(PreferLanguage)) - << " score=" << Best.second << "\n"); - return Commands[Best.first]; + DEBUG_WITH_TYPE( + "interpolate", + llvm::dbgs() << "interpolate: chose " << OriginalPaths[Best.first] + << " as proxy for " << OriginalFilename << " preferring " + << (PreferLanguage == types::TY_INVALID + ? "none" + : types::getTypeName(PreferLanguage)) + << " score=" << Best.second << "\n"); + return OriginalPaths[Best.first]; } private: @@ -338,7 +428,7 @@ private: ScoredCandidate S; S.Index = Candidate.first; S.Preferred = PreferredLanguage == types::TY_INVALID || - PreferredLanguage == Commands[S.Index].Type; + PreferredLanguage == Types[S.Index]; S.Points = Candidate.second; if (!S.Preferred && Best.Preferred) continue; @@ -371,7 +461,7 @@ private: // If Prefix is true, it's instead the range starting with Key. template <bool Prefix> ArrayRef<SubstringAndIndex> - indexLookup(StringRef Key, const std::vector<SubstringAndIndex> &Idx) const { + indexLookup(StringRef Key, ArrayRef<SubstringAndIndex> Idx) const { // Use pointers as iteratiors to ease conversion of result to ArrayRef. auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key, Less<Prefix>()); @@ -379,8 +469,8 @@ private: } // Performs a point lookup into a nonempty index, returning a longest match. - SubstringAndIndex - longestMatch(StringRef Key, const std::vector<SubstringAndIndex> &Idx) const { + SubstringAndIndex longestMatch(StringRef Key, + ArrayRef<SubstringAndIndex> Idx) const { assert(!Idx.empty()); // Longest substring match will be adjacent to a direct lookup. auto It = @@ -395,22 +485,27 @@ private: return Prefix > PrevPrefix ? *It : *--It; } - std::vector<TransferableCommand> Commands; // Indexes point into this. + // Original paths, everything else is in lowercase. + std::vector<std::string> OriginalPaths; BumpPtrAllocator Arena; StringSaver Strings; // Indexes of candidates by certain substrings. // String is lowercase and sorted, index points into OriginalPaths. std::vector<SubstringAndIndex> Paths; // Full path. + // Lang types obtained by guessing on the corresponding path. I-th element is + // a type for the I-th path. + std::vector<types::ID> Types; std::vector<SubstringAndIndex> Stems; // Basename, without extension. std::vector<SubstringAndIndex> Components; // Last path components. }; // The actual CompilationDatabase wrapper delegates to its inner database. -// If no match, looks up a command in CommandIndex and transfers it to the file. +// If no match, looks up a proxy file in FileIndex and transfers its +// command to the requested file. class InterpolatingCompilationDatabase : public CompilationDatabase { public: InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner) - : Inner(std::move(Inner)), Index(allCommands()) {} + : Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {} std::vector<CompileCommand> getCompileCommands(StringRef Filename) const override { @@ -421,7 +516,11 @@ public: auto Lang = guessType(Filename, &TypeCertain); if (!TypeCertain) Lang = types::TY_INVALID; - return {Index.chooseProxy(Filename, foldType(Lang)).transferTo(Filename)}; + auto ProxyCommands = + Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang))); + if (ProxyCommands.empty()) + return {}; + return {TransferableCommand(ProxyCommands[0]).transferTo(Filename)}; } std::vector<std::string> getAllFiles() const override { @@ -433,18 +532,8 @@ public: } private: - std::vector<TransferableCommand> allCommands() { - std::vector<TransferableCommand> Result; - for (auto Command : Inner->getAllCompileCommands()) { - Result.emplace_back(std::move(Command)); - if (Result.back().Type == types::TY_INVALID) - Result.pop_back(); - } - return Result; - } - std::unique_ptr<CompilationDatabase> Inner; - CommandIndex Index; + FileIndex Index; }; } // namespace |