summaryrefslogtreecommitdiff
path: root/include/llvm/LTO/LTO.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/LTO/LTO.h')
-rw-r--r--include/llvm/LTO/LTO.h435
1 files changed, 409 insertions, 26 deletions
diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h
index 5154c0007aaa..bc435702157e 100644
--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@@ -16,39 +16,28 @@
#ifndef LLVM_LTO_LTO_H
#define LLVM_LTO_LTO_H
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/Linker/IRMover.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Support/thread.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
namespace llvm {
+class BitcodeModule;
+class Error;
class LLVMContext;
class MemoryBufferRef;
class Module;
-
-/// Helper to load a module from bitcode.
-std::unique_ptr<Module> loadModuleFromBuffer(const MemoryBufferRef &Buffer,
- LLVMContext &Context, bool Lazy);
-
-/// Provide a "loader" for the FunctionImporter to access function from other
-/// modules.
-class ModuleLoader {
- /// The context that will be used for importing.
- LLVMContext &Context;
-
- /// Map from Module identifier to MemoryBuffer. Used by clients like the
- /// FunctionImported to request loading a Module.
- StringMap<MemoryBufferRef> &ModuleMap;
-
-public:
- ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap)
- : Context(Context), ModuleMap(ModuleMap) {}
-
- /// Load a module on demand.
- std::unique_ptr<Module> operator()(StringRef Identifier) {
- return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true);
- }
-};
-
+class Target;
+class raw_pwrite_stream;
/// Resolve Weak and LinkOnce values in the \p Index. Linkage changes recorded
/// in the index and the ThinLTO backends must apply the changes to the Module
@@ -69,6 +58,400 @@ void thinLTOResolveWeakForLinkerInIndex(
void thinLTOInternalizeAndPromoteInIndex(
ModuleSummaryIndex &Index,
function_ref<bool(StringRef, GlobalValue::GUID)> isExported);
-}
+
+namespace lto {
+
+/// Given the original \p Path to an output file, replace any path
+/// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
+/// resulting directory if it does not yet exist.
+std::string getThinLTOOutputFile(const std::string &Path,
+ const std::string &OldPrefix,
+ const std::string &NewPrefix);
+
+class LTO;
+struct SymbolResolution;
+class ThinBackendProc;
+
+/// An input file. This is a wrapper for ModuleSymbolTable that exposes only the
+/// information that an LTO client should need in order to do symbol resolution.
+class InputFile {
+ // FIXME: Remove LTO class friendship once we have bitcode symbol tables.
+ friend LTO;
+ InputFile() = default;
+
+ // FIXME: Remove the LLVMContext once we have bitcode symbol tables.
+ LLVMContext Ctx;
+ struct InputModule;
+ std::vector<InputModule> Mods;
+ ModuleSymbolTable SymTab;
+
+ std::vector<StringRef> Comdats;
+ DenseMap<const Comdat *, unsigned> ComdatMap;
+
+public:
+ ~InputFile();
+
+ /// Create an InputFile.
+ static Expected<std::unique_ptr<InputFile>> create(MemoryBufferRef Object);
+
+ class symbol_iterator;
+
+ /// This is a wrapper for ArrayRef<ModuleSymbolTable::Symbol>::iterator that
+ /// exposes only the information that an LTO client should need in order to do
+ /// symbol resolution.
+ ///
+ /// This object is ephemeral; it is only valid as long as an iterator obtained
+ /// from symbols() refers to it.
+ class Symbol {
+ friend symbol_iterator;
+ friend LTO;
+
+ ArrayRef<ModuleSymbolTable::Symbol>::iterator I;
+ const ModuleSymbolTable &SymTab;
+ const InputFile *File;
+ uint32_t Flags;
+ SmallString<64> Name;
+
+ bool shouldSkip() {
+ return !(Flags & object::BasicSymbolRef::SF_Global) ||
+ (Flags & object::BasicSymbolRef::SF_FormatSpecific);
+ }
+
+ void skip() {
+ ArrayRef<ModuleSymbolTable::Symbol>::iterator E = SymTab.symbols().end();
+ while (I != E) {
+ Flags = SymTab.getSymbolFlags(*I);
+ if (!shouldSkip())
+ break;
+ ++I;
+ }
+ if (I == E)
+ return;
+
+ Name.clear();
+ {
+ raw_svector_ostream OS(Name);
+ SymTab.printSymbolName(OS, *I);
+ }
+ }
+
+ bool isGV() const { return I->is<GlobalValue *>(); }
+ GlobalValue *getGV() const { return I->get<GlobalValue *>(); }
+
+ public:
+ Symbol(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
+ const ModuleSymbolTable &SymTab, const InputFile *File)
+ : I(I), SymTab(SymTab), File(File) {
+ skip();
+ }
+
+ /// Returns the mangled name of the global.
+ StringRef getName() const { return Name; }
+
+ uint32_t getFlags() const { return Flags; }
+ GlobalValue::VisibilityTypes getVisibility() const {
+ if (isGV())
+ return getGV()->getVisibility();
+ return GlobalValue::DefaultVisibility;
+ }
+ bool canBeOmittedFromSymbolTable() const {
+ return isGV() && llvm::canBeOmittedFromSymbolTable(getGV());
+ }
+ bool isTLS() const {
+ // FIXME: Expose a thread-local flag for module asm symbols.
+ return isGV() && getGV()->isThreadLocal();
+ }
+
+ // Returns the index of the comdat this symbol is in or -1 if the symbol
+ // is not in a comdat.
+ // FIXME: We have to return Expected<int> because aliases point to an
+ // arbitrary ConstantExpr and that might not actually be a constant. That
+ // means we might not be able to find what an alias is aliased to and
+ // so find its comdat.
+ Expected<int> getComdatIndex() const;
+
+ uint64_t getCommonSize() const {
+ assert(Flags & object::BasicSymbolRef::SF_Common);
+ if (!isGV())
+ return 0;
+ return getGV()->getParent()->getDataLayout().getTypeAllocSize(
+ getGV()->getType()->getElementType());
+ }
+ unsigned getCommonAlignment() const {
+ assert(Flags & object::BasicSymbolRef::SF_Common);
+ if (!isGV())
+ return 0;
+ return getGV()->getAlignment();
+ }
+ };
+
+ class symbol_iterator {
+ Symbol Sym;
+
+ public:
+ symbol_iterator(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
+ const ModuleSymbolTable &SymTab, const InputFile *File)
+ : Sym(I, SymTab, File) {}
+
+ symbol_iterator &operator++() {
+ ++Sym.I;
+ Sym.skip();
+ return *this;
+ }
+
+ symbol_iterator operator++(int) {
+ symbol_iterator I = *this;
+ ++*this;
+ return I;
+ }
+
+ const Symbol &operator*() const { return Sym; }
+ const Symbol *operator->() const { return &Sym; }
+
+ bool operator!=(const symbol_iterator &Other) const {
+ return Sym.I != Other.Sym.I;
+ }
+ };
+
+ /// A range over the symbols in this InputFile.
+ iterator_range<symbol_iterator> symbols() {
+ return llvm::make_range(
+ symbol_iterator(SymTab.symbols().begin(), SymTab, this),
+ symbol_iterator(SymTab.symbols().end(), SymTab, this));
+ }
+
+ /// Returns the path to the InputFile.
+ StringRef getName() const;
+
+ /// Returns the source file path specified at compile time.
+ StringRef getSourceFileName() const;
+
+ // Returns a table with all the comdats used by this file.
+ ArrayRef<StringRef> getComdatTable() const { return Comdats; }
+
+private:
+ iterator_range<symbol_iterator> module_symbols(InputModule &IM);
+};
+
+/// This class wraps an output stream for a native object. Most clients should
+/// just be able to return an instance of this base class from the stream
+/// callback, but if a client needs to perform some action after the stream is
+/// written to, that can be done by deriving from this class and overriding the
+/// destructor.
+class NativeObjectStream {
+public:
+ NativeObjectStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
+ std::unique_ptr<raw_pwrite_stream> OS;
+ virtual ~NativeObjectStream() = default;
+};
+
+/// This type defines the callback to add a native object that is generated on
+/// the fly.
+///
+/// Stream callbacks must be thread safe.
+typedef std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>
+ AddStreamFn;
+
+/// This is the type of a native object cache. To request an item from the
+/// cache, pass a unique string as the Key. For hits, the cached file will be
+/// added to the link and this function will return AddStreamFn(). For misses,
+/// the cache will return a stream callback which must be called at most once to
+/// produce content for the stream. The native object stream produced by the
+/// stream callback will add the file to the link after the stream is written
+/// to.
+///
+/// Clients generally look like this:
+///
+/// if (AddStreamFn AddStream = Cache(Task, Key))
+/// ProduceContent(AddStream);
+typedef std::function<AddStreamFn(unsigned Task, StringRef Key)>
+ NativeObjectCache;
+
+/// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
+/// The details of this type definition aren't important; clients can only
+/// create a ThinBackend using one of the create*ThinBackend() functions below.
+typedef std::function<std::unique_ptr<ThinBackendProc>(
+ Config &C, ModuleSummaryIndex &CombinedIndex,
+ StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ AddStreamFn AddStream, NativeObjectCache Cache)>
+ ThinBackend;
+
+/// This ThinBackend runs the individual backend jobs in-process.
+ThinBackend createInProcessThinBackend(unsigned ParallelismLevel);
+
+/// This ThinBackend writes individual module indexes to files, instead of
+/// running the individual backend jobs. This backend is for distributed builds
+/// where separate processes will invoke the real backends.
+///
+/// To find the path to write the index to, the backend checks if the path has a
+/// prefix of OldPrefix; if so, it replaces that prefix with NewPrefix. It then
+/// appends ".thinlto.bc" and writes the index to that path. If
+/// ShouldEmitImportsFiles is true it also writes a list of imported files to a
+/// similar path with ".imports" appended instead.
+ThinBackend createWriteIndexesThinBackend(std::string OldPrefix,
+ std::string NewPrefix,
+ bool ShouldEmitImportsFiles,
+ std::string LinkedObjectsFile);
+
+/// This class implements a resolution-based interface to LLVM's LTO
+/// functionality. It supports regular LTO, parallel LTO code generation and
+/// ThinLTO. You can use it from a linker in the following way:
+/// - Set hooks and code generation options (see lto::Config struct defined in
+/// Config.h), and use the lto::Config object to create an lto::LTO object.
+/// - Create lto::InputFile objects using lto::InputFile::create(), then use
+/// the symbols() function to enumerate its symbols and compute a resolution
+/// for each symbol (see SymbolResolution below).
+/// - After the linker has visited each input file (and each regular object
+/// file) and computed a resolution for each symbol, take each lto::InputFile
+/// and pass it and an array of symbol resolutions to the add() function.
+/// - Call the getMaxTasks() function to get an upper bound on the number of
+/// native object files that LTO may add to the link.
+/// - Call the run() function. This function will use the supplied AddStream
+/// and Cache functions to add up to getMaxTasks() native object files to
+/// the link.
+class LTO {
+ friend InputFile;
+
+public:
+ /// Create an LTO object. A default constructed LTO object has a reasonable
+ /// production configuration, but you can customize it by passing arguments to
+ /// this constructor.
+ /// FIXME: We do currently require the DiagHandler field to be set in Conf.
+ /// Until that is fixed, a Config argument is required.
+ LTO(Config Conf, ThinBackend Backend = nullptr,
+ unsigned ParallelCodeGenParallelismLevel = 1);
+ ~LTO();
+
+ /// Add an input file to the LTO link, using the provided symbol resolutions.
+ /// The symbol resolutions must appear in the enumeration order given by
+ /// InputFile::symbols().
+ Error add(std::unique_ptr<InputFile> Obj, ArrayRef<SymbolResolution> Res);
+
+ /// Returns an upper bound on the number of tasks that the client may expect.
+ /// This may only be called after all IR object files have been added. For a
+ /// full description of tasks see LTOBackend.h.
+ unsigned getMaxTasks() const;
+
+ /// Runs the LTO pipeline. This function calls the supplied AddStream
+ /// function to add native object files to the link.
+ ///
+ /// The Cache parameter is optional. If supplied, it will be used to cache
+ /// native object files and add them to the link.
+ ///
+ /// The client will receive at most one callback (via either AddStream or
+ /// Cache) for each task identifier.
+ Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
+
+private:
+ Config Conf;
+
+ struct RegularLTOState {
+ RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf);
+ struct CommonResolution {
+ uint64_t Size = 0;
+ unsigned Align = 0;
+ /// Record if at least one instance of the common was marked as prevailing
+ bool Prevailing = false;
+ };
+ std::map<std::string, CommonResolution> Commons;
+
+ unsigned ParallelCodeGenParallelismLevel;
+ LTOLLVMContext Ctx;
+ bool HasModule = false;
+ std::unique_ptr<Module> CombinedModule;
+ std::unique_ptr<IRMover> Mover;
+ } RegularLTO;
+
+ struct ThinLTOState {
+ ThinLTOState(ThinBackend Backend);
+
+ ThinBackend Backend;
+ ModuleSummaryIndex CombinedIndex;
+ MapVector<StringRef, BitcodeModule> ModuleMap;
+ DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
+ } ThinLTO;
+
+ // The global resolution for a particular (mangled) symbol name. This is in
+ // particular necessary to track whether each symbol can be internalized.
+ // Because any input file may introduce a new cross-partition reference, we
+ // cannot make any final internalization decisions until all input files have
+ // been added and the client has called run(). During run() we apply
+ // internalization decisions either directly to the module (for regular LTO)
+ // or to the combined index (for ThinLTO).
+ struct GlobalResolution {
+ /// The unmangled name of the global.
+ std::string IRName;
+
+ bool UnnamedAddr = true;
+
+ /// This field keeps track of the partition number of this global. The
+ /// regular LTO object is partition 0, while each ThinLTO object has its own
+ /// partition number from 1 onwards.
+ ///
+ /// Any global that is defined or used by more than one partition, or that
+ /// is referenced externally, may not be internalized.
+ ///
+ /// Partitions generally have a one-to-one correspondence with tasks, except
+ /// that we use partition 0 for all parallel LTO code generation partitions.
+ /// Any partitioning of the combined LTO object is done internally by the
+ /// LTO backend.
+ unsigned Partition = Unknown;
+
+ /// Special partition numbers.
+ enum : unsigned {
+ /// A partition number has not yet been assigned to this global.
+ Unknown = -1u,
+
+ /// This global is either used by more than one partition or has an
+ /// external reference, and therefore cannot be internalized.
+ External = -2u,
+ };
+ };
+
+ // Global mapping from mangled symbol names to resolutions.
+ StringMap<GlobalResolution> GlobalResolutions;
+
+ void addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
+ const InputFile::Symbol &Sym, SymbolResolution Res,
+ unsigned Partition);
+
+ // These functions take a range of symbol resolutions [ResI, ResE) and consume
+ // the resolutions used by a single input module by incrementing ResI. After
+ // these functions return, [ResI, ResE) will refer to the resolution range for
+ // the remaining modules in the InputFile.
+ Error addModule(InputFile &Input, InputFile::InputModule &IM,
+ const SymbolResolution *&ResI, const SymbolResolution *ResE);
+ Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
+ const SymbolResolution *ResE);
+ Error addThinLTO(BitcodeModule BM, Module &M,
+ iterator_range<InputFile::symbol_iterator> Syms,
+ const SymbolResolution *&ResI, const SymbolResolution *ResE);
+
+ Error runRegularLTO(AddStreamFn AddStream);
+ Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+ bool HasRegularLTO);
+
+ mutable bool CalledGetMaxTasks = false;
+};
+
+/// The resolution for a symbol. The linker must provide a SymbolResolution for
+/// each global symbol based on its internal resolution of that symbol.
+struct SymbolResolution {
+ SymbolResolution()
+ : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0) {
+ }
+ /// The linker has chosen this definition of the symbol.
+ unsigned Prevailing : 1;
+
+ /// The definition of this symbol is unpreemptable at runtime and is known to
+ /// be in this linkage unit.
+ unsigned FinalDefinitionInLinkageUnit : 1;
+
+ /// The definition of this symbol is visible outside of the LTO unit.
+ unsigned VisibleToRegularObj : 1;
+};
+
+} // namespace lto
+} // namespace llvm
#endif