diff options
Diffstat (limited to 'llvm/tools/llvm-extract/llvm-extract.cpp')
-rw-r--r-- | llvm/tools/llvm-extract/llvm-extract.cpp | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp new file mode 100644 index 000000000000..dddc0d9baa08 --- /dev/null +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -0,0 +1,381 @@ +//===- llvm-extract.cpp - LLVM function extraction utility ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This utility changes the input module to only contain a single function, +// which is primarily used for debugging transformations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/SystemUtils.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Transforms/IPO.h" +#include <memory> +using namespace llvm; + +cl::OptionCategory ExtractCat("llvm-extract Options"); + +// InputFilename - The filename to read from. +static cl::opt<std::string> InputFilename(cl::Positional, + cl::desc("<input bitcode file>"), + cl::init("-"), + cl::value_desc("filename")); + +static cl::opt<std::string> OutputFilename("o", + cl::desc("Specify output filename"), + cl::value_desc("filename"), + cl::init("-"), cl::cat(ExtractCat)); + +static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"), + cl::cat(ExtractCat)); + +static cl::opt<bool> DeleteFn("delete", + cl::desc("Delete specified Globals from Module"), + cl::cat(ExtractCat)); + +static cl::opt<bool> + Recursive("recursive", cl::desc("Recursively extract all called functions"), + cl::cat(ExtractCat)); + +// ExtractFuncs - The functions to extract from the module. +static cl::list<std::string> + ExtractFuncs("func", cl::desc("Specify function to extract"), + cl::ZeroOrMore, cl::value_desc("function"), + cl::cat(ExtractCat)); + +// ExtractRegExpFuncs - The functions, matched via regular expression, to +// extract from the module. +static cl::list<std::string> + ExtractRegExpFuncs("rfunc", + cl::desc("Specify function(s) to extract using a " + "regular expression"), + cl::ZeroOrMore, cl::value_desc("rfunction"), + cl::cat(ExtractCat)); + +// ExtractBlocks - The blocks to extract from the module. +static cl::list<std::string> ExtractBlocks( + "bb", + cl::desc( + "Specify <function, basic block1[;basic block2...]> pairs to extract.\n" + "Each pair will create a function.\n" + "If multiple basic blocks are specified in one pair,\n" + "the first block in the sequence should dominate the rest.\n" + "eg:\n" + " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" + " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " + "with bb2."), + cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), + cl::cat(ExtractCat)); + +// ExtractAlias - The alias to extract from the module. +static cl::list<std::string> + ExtractAliases("alias", cl::desc("Specify alias to extract"), + cl::ZeroOrMore, cl::value_desc("alias"), + cl::cat(ExtractCat)); + +// ExtractRegExpAliases - The aliases, matched via regular expression, to +// extract from the module. +static cl::list<std::string> + ExtractRegExpAliases("ralias", + cl::desc("Specify alias(es) to extract using a " + "regular expression"), + cl::ZeroOrMore, cl::value_desc("ralias"), + cl::cat(ExtractCat)); + +// ExtractGlobals - The globals to extract from the module. +static cl::list<std::string> + ExtractGlobals("glob", cl::desc("Specify global to extract"), + cl::ZeroOrMore, cl::value_desc("global"), + cl::cat(ExtractCat)); + +// ExtractRegExpGlobals - The globals, matched via regular expression, to +// extract from the module... +static cl::list<std::string> + ExtractRegExpGlobals("rglob", + cl::desc("Specify global(s) to extract using a " + "regular expression"), + cl::ZeroOrMore, cl::value_desc("rglobal"), + cl::cat(ExtractCat)); + +static cl::opt<bool> OutputAssembly("S", + cl::desc("Write output as LLVM assembly"), + cl::Hidden, cl::cat(ExtractCat)); + +static cl::opt<bool> PreserveBitcodeUseListOrder( + "preserve-bc-uselistorder", + cl::desc("Preserve use-list order when writing LLVM bitcode."), + cl::init(true), cl::Hidden, cl::cat(ExtractCat)); + +static cl::opt<bool> PreserveAssemblyUseListOrder( + "preserve-ll-uselistorder", + cl::desc("Preserve use-list order when writing LLVM assembly."), + cl::init(false), cl::Hidden, cl::cat(ExtractCat)); + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + LLVMContext Context; + cl::HideUnrelatedOptions(ExtractCat); + cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); + + // Use lazy loading, since we only care about selected global values. + SMDiagnostic Err; + std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context); + + if (!M.get()) { + Err.print(argv[0], errs()); + return 1; + } + + // Use SetVector to avoid duplicates. + SetVector<GlobalValue *> GVs; + + // Figure out which aliases we should extract. + for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { + GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); + if (!GA) { + errs() << argv[0] << ": program doesn't contain alias named '" + << ExtractAliases[i] << "'!\n"; + return 1; + } + GVs.insert(GA); + } + + // Extract aliases via regular expression matching. + for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { + std::string Error; + Regex RegEx(ExtractRegExpAliases[i]); + if (!RegEx.isValid(Error)) { + errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " + "invalid regex: " << Error; + } + bool match = false; + for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); + GA != E; GA++) { + if (RegEx.match(GA->getName())) { + GVs.insert(&*GA); + match = true; + } + } + if (!match) { + errs() << argv[0] << ": program doesn't contain global named '" + << ExtractRegExpAliases[i] << "'!\n"; + return 1; + } + } + + // Figure out which globals we should extract. + for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { + GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); + if (!GV) { + errs() << argv[0] << ": program doesn't contain global named '" + << ExtractGlobals[i] << "'!\n"; + return 1; + } + GVs.insert(GV); + } + + // Extract globals via regular expression matching. + for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { + std::string Error; + Regex RegEx(ExtractRegExpGlobals[i]); + if (!RegEx.isValid(Error)) { + errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " + "invalid regex: " << Error; + } + bool match = false; + for (auto &GV : M->globals()) { + if (RegEx.match(GV.getName())) { + GVs.insert(&GV); + match = true; + } + } + if (!match) { + errs() << argv[0] << ": program doesn't contain global named '" + << ExtractRegExpGlobals[i] << "'!\n"; + return 1; + } + } + + // Figure out which functions we should extract. + for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { + GlobalValue *GV = M->getFunction(ExtractFuncs[i]); + if (!GV) { + errs() << argv[0] << ": program doesn't contain function named '" + << ExtractFuncs[i] << "'!\n"; + return 1; + } + GVs.insert(GV); + } + // Extract functions via regular expression matching. + for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { + std::string Error; + StringRef RegExStr = ExtractRegExpFuncs[i]; + Regex RegEx(RegExStr); + if (!RegEx.isValid(Error)) { + errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " + "invalid regex: " << Error; + } + bool match = false; + for (Module::iterator F = M->begin(), E = M->end(); F != E; + F++) { + if (RegEx.match(F->getName())) { + GVs.insert(&*F); + match = true; + } + } + if (!match) { + errs() << argv[0] << ": program doesn't contain global named '" + << ExtractRegExpFuncs[i] << "'!\n"; + return 1; + } + } + + // Figure out which BasicBlocks we should extract. + SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupOfBBs; + for (StringRef StrPair : ExtractBlocks) { + auto BBInfo = StrPair.split(':'); + // Get the function. + Function *F = M->getFunction(BBInfo.first); + if (!F) { + errs() << argv[0] << ": program doesn't contain a function named '" + << BBInfo.first << "'!\n"; + return 1; + } + // Do not materialize this function. + GVs.insert(F); + // Get the basic blocks. + SmallVector<BasicBlock *, 16> BBs; + SmallVector<StringRef, 16> BBNames; + BBInfo.second.split(BBNames, ';', /*MaxSplit=*/-1, + /*KeepEmpty=*/false); + for (StringRef BBName : BBNames) { + auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { + return BB.getName().equals(BBName); + }); + if (Res == F->end()) { + errs() << argv[0] << ": function " << F->getName() + << " doesn't contain a basic block named '" << BBInfo.second + << "'!\n"; + return 1; + } + BBs.push_back(&*Res); + } + GroupOfBBs.push_back(BBs); + } + + // Use *argv instead of argv[0] to work around a wrong GCC warning. + ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: "); + + if (Recursive) { + std::vector<llvm::Function *> Workqueue; + for (GlobalValue *GV : GVs) { + if (auto *F = dyn_cast<Function>(GV)) { + Workqueue.push_back(F); + } + } + while (!Workqueue.empty()) { + Function *F = &*Workqueue.back(); + Workqueue.pop_back(); + ExitOnErr(F->materialize()); + for (auto &BB : *F) { + for (auto &I : BB) { + CallBase *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + Function *CF = CB->getCalledFunction(); + if (!CF) + continue; + if (CF->isDeclaration() || GVs.count(CF)) + continue; + GVs.insert(CF); + Workqueue.push_back(CF); + } + } + } + } + + auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); }; + + // Materialize requisite global values. + if (!DeleteFn) { + for (size_t i = 0, e = GVs.size(); i != e; ++i) + Materialize(*GVs[i]); + } else { + // Deleting. Materialize every GV that's *not* in GVs. + SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end()); + for (auto &F : *M) { + if (!GVSet.count(&F)) + Materialize(F); + } + } + + { + std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end()); + legacy::PassManager Extract; + Extract.add(createGVExtractionPass(Gvs, DeleteFn)); + Extract.run(*M); + + // Now that we have all the GVs we want, mark the module as fully + // materialized. + // FIXME: should the GVExtractionPass handle this? + ExitOnErr(M->materializeAll()); + } + + // Extract the specified basic blocks from the module and erase the existing + // functions. + if (!ExtractBlocks.empty()) { + legacy::PassManager PM; + PM.add(createBlockExtractorPass(GroupOfBBs, true)); + PM.run(*M); + } + + // In addition to deleting all other functions, we also want to spiff it + // up a little bit. Do this now. + legacy::PassManager Passes; + + if (!DeleteFn) + Passes.add(createGlobalDCEPass()); // Delete unreachable globals + Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info + Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls + + std::error_code EC; + ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None); + if (EC) { + errs() << EC.message() << '\n'; + return 1; + } + + if (OutputAssembly) + Passes.add( + createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); + else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true)) + Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); + + Passes.run(*M.get()); + + // Declare success. + Out.keep(); + + return 0; +} |