diff options
Diffstat (limited to 'llvm/lib/LTO')
| -rw-r--r-- | llvm/lib/LTO/Caching.cpp | 150 | ||||
| -rw-r--r-- | llvm/lib/LTO/LTO.cpp | 1412 | ||||
| -rw-r--r-- | llvm/lib/LTO/LTOBackend.cpp | 547 | ||||
| -rw-r--r-- | llvm/lib/LTO/LTOCodeGenerator.cpp | 722 | ||||
| -rw-r--r-- | llvm/lib/LTO/LTOModule.cpp | 678 | ||||
| -rw-r--r-- | llvm/lib/LTO/SummaryBasedOptimizations.cpp | 85 | ||||
| -rw-r--r-- | llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 1135 | ||||
| -rw-r--r-- | llvm/lib/LTO/UpdateCompilerUsed.cpp | 132 |
8 files changed, 4861 insertions, 0 deletions
diff --git a/llvm/lib/LTO/Caching.cpp b/llvm/lib/LTO/Caching.cpp new file mode 100644 index 0000000000000..12dcd182de2d0 --- /dev/null +++ b/llvm/lib/LTO/Caching.cpp @@ -0,0 +1,150 @@ +//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Caching for ThinLTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/Caching.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; +using namespace llvm::lto; + +Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath, + AddBufferFn AddBuffer) { + if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPath)) + return errorCodeToError(EC); + + return [=](unsigned Task, StringRef Key) -> AddStreamFn { + // This choice of file name allows the cache to be pruned (see pruneCache() + // in include/llvm/Support/CachePruning.h). + SmallString<64> EntryPath; + sys::path::append(EntryPath, CacheDirectoryPath, "llvmcache-" + Key); + // First, see if we have a cache hit. + SmallString<64> ResultPath; + Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead( + Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath); + std::error_code EC; + if (FDOrErr) { + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getOpenFile(*FDOrErr, EntryPath, + /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + sys::fs::closeFile(*FDOrErr); + if (MBOrErr) { + AddBuffer(Task, std::move(*MBOrErr)); + return AddStreamFn(); + } + EC = MBOrErr.getError(); + } else { + EC = errorToErrorCode(FDOrErr.takeError()); + } + + // On Windows we can fail to open a cache file with a permission denied + // error. This generally means that another process has requested to delete + // the file while it is still open, but it could also mean that another + // process has opened the file without the sharing permissions we need. + // Since the file is probably being deleted we handle it in the same way as + // if the file did not exist at all. + if (EC != errc::no_such_file_or_directory && EC != errc::permission_denied) + report_fatal_error(Twine("Failed to open cache file ") + EntryPath + + ": " + EC.message() + "\n"); + + // This native object stream is responsible for commiting the resulting + // file to the cache and calling AddBuffer to add it to the link. + struct CacheStream : NativeObjectStream { + AddBufferFn AddBuffer; + sys::fs::TempFile TempFile; + std::string EntryPath; + unsigned Task; + + CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddBufferFn AddBuffer, + sys::fs::TempFile TempFile, std::string EntryPath, + unsigned Task) + : NativeObjectStream(std::move(OS)), AddBuffer(std::move(AddBuffer)), + TempFile(std::move(TempFile)), EntryPath(std::move(EntryPath)), + Task(Task) {} + + ~CacheStream() { + // Make sure the stream is closed before committing it. + OS.reset(); + + // Open the file first to avoid racing with a cache pruner. + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getOpenFile( + sys::fs::convertFDToNativeFile(TempFile.FD), TempFile.TmpName, + /*FileSize=*/-1, /*RequiresNullTerminator=*/false); + if (!MBOrErr) + report_fatal_error(Twine("Failed to open new cache file ") + + TempFile.TmpName + ": " + + MBOrErr.getError().message() + "\n"); + + // On POSIX systems, this will atomically replace the destination if + // it already exists. We try to emulate this on Windows, but this may + // fail with a permission denied error (for example, if the destination + // is currently opened by another process that does not give us the + // sharing permissions we need). Since the existing file should be + // semantically equivalent to the one we are trying to write, we give + // AddBuffer a copy of the bytes we wrote in that case. We do this + // instead of just using the existing file, because the pruner might + // delete the file before we get a chance to use it. + Error E = TempFile.keep(EntryPath); + E = handleErrors(std::move(E), [&](const ECError &E) -> Error { + std::error_code EC = E.convertToErrorCode(); + if (EC != errc::permission_denied) + return errorCodeToError(EC); + + auto MBCopy = MemoryBuffer::getMemBufferCopy((*MBOrErr)->getBuffer(), + EntryPath); + MBOrErr = std::move(MBCopy); + + // FIXME: should we consume the discard error? + consumeError(TempFile.discard()); + + return Error::success(); + }); + + if (E) + report_fatal_error(Twine("Failed to rename temporary file ") + + TempFile.TmpName + " to " + EntryPath + ": " + + toString(std::move(E)) + "\n"); + + AddBuffer(Task, std::move(*MBOrErr)); + } + }; + + return [=](size_t Task) -> std::unique_ptr<NativeObjectStream> { + // Write to a temporary to avoid race condition + SmallString<64> TempFilenameModel; + sys::path::append(TempFilenameModel, CacheDirectoryPath, "Thin-%%%%%%.tmp.o"); + Expected<sys::fs::TempFile> Temp = sys::fs::TempFile::create( + TempFilenameModel, sys::fs::owner_read | sys::fs::owner_write); + if (!Temp) { + errs() << "Error: " << toString(Temp.takeError()) << "\n"; + report_fatal_error("ThinLTO: Can't get a temporary file"); + } + + // This CacheStream will move the temporary file into the cache when done. + return std::make_unique<CacheStream>( + std::make_unique<raw_fd_ostream>(Temp->FD, /* ShouldClose */ false), + AddBuffer, std::move(*Temp), EntryPath.str(), Task); + }; + }; +} diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp new file mode 100644 index 0000000000000..1e345e7dd89e5 --- /dev/null +++ b/llvm/lib/LTO/LTO.cpp @@ -0,0 +1,1412 @@ +//===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements functions and classes used to support LTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/LTO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/AutoUpgrade.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/RemarkStreamer.h" +#include "llvm/LTO/LTOBackend.h" +#include "llvm/LTO/SummaryBasedOptimizations.h" +#include "llvm/Linker/IRMover.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/VCSRevision.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +#include <set> + +using namespace llvm; +using namespace lto; +using namespace object; + +#define DEBUG_TYPE "lto" + +static cl::opt<bool> + DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden, + cl::desc("Dump the SCCs in the ThinLTO index's callgraph")); + +/// Enable global value internalization in LTO. +cl::opt<bool> EnableLTOInternalization( + "enable-lto-internalization", cl::init(true), cl::Hidden, + cl::desc("Enable global value internalization in LTO")); + +// Computes a unique hash for the Module considering the current list of +// export/import and other global analysis results. +// The hash is produced in \p Key. +void llvm::computeLTOCacheKey( + SmallString<40> &Key, const Config &Conf, const ModuleSummaryIndex &Index, + StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + const std::set<GlobalValue::GUID> &CfiFunctionDefs, + const std::set<GlobalValue::GUID> &CfiFunctionDecls) { + // Compute the unique hash for this entry. + // This is based on the current compiler version, the module itself, the + // export list, the hash for every single module in the import list, the + // list of ResolvedODR for the module, and the list of preserved symbols. + SHA1 Hasher; + + // Start with the compiler revision + Hasher.update(LLVM_VERSION_STRING); +#ifdef LLVM_REVISION + Hasher.update(LLVM_REVISION); +#endif + + // Include the parts of the LTO configuration that affect code generation. + auto AddString = [&](StringRef Str) { + Hasher.update(Str); + Hasher.update(ArrayRef<uint8_t>{0}); + }; + auto AddUnsigned = [&](unsigned I) { + uint8_t Data[4]; + Data[0] = I; + Data[1] = I >> 8; + Data[2] = I >> 16; + Data[3] = I >> 24; + Hasher.update(ArrayRef<uint8_t>{Data, 4}); + }; + auto AddUint64 = [&](uint64_t I) { + uint8_t Data[8]; + Data[0] = I; + Data[1] = I >> 8; + Data[2] = I >> 16; + Data[3] = I >> 24; + Data[4] = I >> 32; + Data[5] = I >> 40; + Data[6] = I >> 48; + Data[7] = I >> 56; + Hasher.update(ArrayRef<uint8_t>{Data, 8}); + }; + AddString(Conf.CPU); + // FIXME: Hash more of Options. For now all clients initialize Options from + // command-line flags (which is unsupported in production), but may set + // RelaxELFRelocations. The clang driver can also pass FunctionSections, + // DataSections and DebuggerTuning via command line flags. + AddUnsigned(Conf.Options.RelaxELFRelocations); + AddUnsigned(Conf.Options.FunctionSections); + AddUnsigned(Conf.Options.DataSections); + AddUnsigned((unsigned)Conf.Options.DebuggerTuning); + for (auto &A : Conf.MAttrs) + AddString(A); + if (Conf.RelocModel) + AddUnsigned(*Conf.RelocModel); + else + AddUnsigned(-1); + if (Conf.CodeModel) + AddUnsigned(*Conf.CodeModel); + else + AddUnsigned(-1); + AddUnsigned(Conf.CGOptLevel); + AddUnsigned(Conf.CGFileType); + AddUnsigned(Conf.OptLevel); + AddUnsigned(Conf.UseNewPM); + AddUnsigned(Conf.Freestanding); + AddString(Conf.OptPipeline); + AddString(Conf.AAPipeline); + AddString(Conf.OverrideTriple); + AddString(Conf.DefaultTriple); + AddString(Conf.DwoDir); + + // Include the hash for the current module + auto ModHash = Index.getModuleHash(ModuleID); + Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + for (auto F : ExportList) + // The export list can impact the internalization, be conservative here + Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F))); + + // Include the hash for every module we import functions from. The set of + // imported symbols for each module may affect code generation and is + // sensitive to link order, so include that as well. + for (auto &Entry : ImportList) { + auto ModHash = Index.getModuleHash(Entry.first()); + Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + + AddUint64(Entry.second.size()); + for (auto &Fn : Entry.second) + AddUint64(Fn); + } + + // Include the hash for the resolved ODR. + for (auto &Entry : ResolvedODR) { + Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first, + sizeof(GlobalValue::GUID))); + Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second, + sizeof(GlobalValue::LinkageTypes))); + } + + // Members of CfiFunctionDefs and CfiFunctionDecls that are referenced or + // defined in this module. + std::set<GlobalValue::GUID> UsedCfiDefs; + std::set<GlobalValue::GUID> UsedCfiDecls; + + // Typeids used in this module. + std::set<GlobalValue::GUID> UsedTypeIds; + + auto AddUsedCfiGlobal = [&](GlobalValue::GUID ValueGUID) { + if (CfiFunctionDefs.count(ValueGUID)) + UsedCfiDefs.insert(ValueGUID); + if (CfiFunctionDecls.count(ValueGUID)) + UsedCfiDecls.insert(ValueGUID); + }; + + auto AddUsedThings = [&](GlobalValueSummary *GS) { + if (!GS) return; + AddUnsigned(GS->isLive()); + AddUnsigned(GS->canAutoHide()); + for (const ValueInfo &VI : GS->refs()) { + AddUnsigned(VI.isDSOLocal()); + AddUsedCfiGlobal(VI.getGUID()); + } + if (auto *GVS = dyn_cast<GlobalVarSummary>(GS)) { + AddUnsigned(GVS->maybeReadOnly()); + AddUnsigned(GVS->maybeWriteOnly()); + } + if (auto *FS = dyn_cast<FunctionSummary>(GS)) { + for (auto &TT : FS->type_tests()) + UsedTypeIds.insert(TT); + for (auto &TT : FS->type_test_assume_vcalls()) + UsedTypeIds.insert(TT.GUID); + for (auto &TT : FS->type_checked_load_vcalls()) + UsedTypeIds.insert(TT.GUID); + for (auto &TT : FS->type_test_assume_const_vcalls()) + UsedTypeIds.insert(TT.VFunc.GUID); + for (auto &TT : FS->type_checked_load_const_vcalls()) + UsedTypeIds.insert(TT.VFunc.GUID); + for (auto &ET : FS->calls()) { + AddUnsigned(ET.first.isDSOLocal()); + AddUsedCfiGlobal(ET.first.getGUID()); + } + } + }; + + // Include the hash for the linkage type to reflect internalization and weak + // resolution, and collect any used type identifier resolutions. + for (auto &GS : DefinedGlobals) { + GlobalValue::LinkageTypes Linkage = GS.second->linkage(); + Hasher.update( + ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage))); + AddUsedCfiGlobal(GS.first); + AddUsedThings(GS.second); + } + + // Imported functions may introduce new uses of type identifier resolutions, + // so we need to collect their used resolutions as well. + for (auto &ImpM : ImportList) + for (auto &ImpF : ImpM.second) { + GlobalValueSummary *S = Index.findSummaryInModule(ImpF, ImpM.first()); + AddUsedThings(S); + // If this is an alias, we also care about any types/etc. that the aliasee + // may reference. + if (auto *AS = dyn_cast_or_null<AliasSummary>(S)) + AddUsedThings(AS->getBaseObject()); + } + + auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) { + AddString(TId); + + AddUnsigned(S.TTRes.TheKind); + AddUnsigned(S.TTRes.SizeM1BitWidth); + + AddUint64(S.TTRes.AlignLog2); + AddUint64(S.TTRes.SizeM1); + AddUint64(S.TTRes.BitMask); + AddUint64(S.TTRes.InlineBits); + + AddUint64(S.WPDRes.size()); + for (auto &WPD : S.WPDRes) { + AddUnsigned(WPD.first); + AddUnsigned(WPD.second.TheKind); + AddString(WPD.second.SingleImplName); + + AddUint64(WPD.second.ResByArg.size()); + for (auto &ByArg : WPD.second.ResByArg) { + AddUint64(ByArg.first.size()); + for (uint64_t Arg : ByArg.first) + AddUint64(Arg); + AddUnsigned(ByArg.second.TheKind); + AddUint64(ByArg.second.Info); + AddUnsigned(ByArg.second.Byte); + AddUnsigned(ByArg.second.Bit); + } + } + }; + + // Include the hash for all type identifiers used by this module. + for (GlobalValue::GUID TId : UsedTypeIds) { + auto TidIter = Index.typeIds().equal_range(TId); + for (auto It = TidIter.first; It != TidIter.second; ++It) + AddTypeIdSummary(It->second.first, It->second.second); + } + + AddUnsigned(UsedCfiDefs.size()); + for (auto &V : UsedCfiDefs) + AddUint64(V); + + AddUnsigned(UsedCfiDecls.size()); + for (auto &V : UsedCfiDecls) + AddUint64(V); + + if (!Conf.SampleProfile.empty()) { + auto FileOrErr = MemoryBuffer::getFile(Conf.SampleProfile); + if (FileOrErr) { + Hasher.update(FileOrErr.get()->getBuffer()); + + if (!Conf.ProfileRemapping.empty()) { + FileOrErr = MemoryBuffer::getFile(Conf.ProfileRemapping); + if (FileOrErr) + Hasher.update(FileOrErr.get()->getBuffer()); + } + } + } + + Key = toHex(Hasher.result()); +} + +static void thinLTOResolvePrevailingGUID( + ValueInfo VI, DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias, + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + isPrevailing, + function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)> + recordNewLinkage, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + for (auto &S : VI.getSummaryList()) { + GlobalValue::LinkageTypes OriginalLinkage = S->linkage(); + // Ignore local and appending linkage values since the linker + // doesn't resolve them. + if (GlobalValue::isLocalLinkage(OriginalLinkage) || + GlobalValue::isAppendingLinkage(S->linkage())) + continue; + // We need to emit only one of these. The prevailing module will keep it, + // but turned into a weak, while the others will drop it when possible. + // This is both a compile-time optimization and a correctness + // transformation. This is necessary for correctness when we have exported + // a reference - we need to convert the linkonce to weak to + // ensure a copy is kept to satisfy the exported reference. + // FIXME: We may want to split the compile time and correctness + // aspects into separate routines. + if (isPrevailing(VI.getGUID(), S.get())) { + if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) { + S->setLinkage(GlobalValue::getWeakLinkage( + GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); + // The kept copy is eligible for auto-hiding (hidden visibility) if all + // copies were (i.e. they were all linkonce_odr global unnamed addr). + // If any copy is not (e.g. it was originally weak_odr), then the symbol + // must remain externally available (e.g. a weak_odr from an explicitly + // instantiated template). Additionally, if it is in the + // GUIDPreservedSymbols set, that means that it is visibile outside + // the summary (e.g. in a native object or a bitcode file without + // summary), and in that case we cannot hide it as it isn't possible to + // check all copies. + S->setCanAutoHide(VI.canAutoHide() && + !GUIDPreservedSymbols.count(VI.getGUID())); + } + } + // Alias and aliasee can't be turned into available_externally. + else if (!isa<AliasSummary>(S.get()) && + !GlobalInvolvedWithAlias.count(S.get())) + S->setLinkage(GlobalValue::AvailableExternallyLinkage); + if (S->linkage() != OriginalLinkage) + recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage()); + } +} + +/// Resolve linkage for prevailing symbols in the \p Index. +// +// We'd like to drop these functions if they are no longer referenced in the +// current module. However there is a chance that another module is still +// referencing them because of the import. We make sure we always emit at least +// one copy. +void llvm::thinLTOResolvePrevailingInIndex( + ModuleSummaryIndex &Index, + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + isPrevailing, + function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)> + recordNewLinkage, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + // We won't optimize the globals that are referenced by an alias for now + // Ideally we should turn the alias into a global and duplicate the definition + // when needed. + DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias; + for (auto &I : Index) + for (auto &S : I.second.SummaryList) + if (auto AS = dyn_cast<AliasSummary>(S.get())) + GlobalInvolvedWithAlias.insert(&AS->getAliasee()); + + for (auto &I : Index) + thinLTOResolvePrevailingGUID(Index.getValueInfo(I), GlobalInvolvedWithAlias, + isPrevailing, recordNewLinkage, + GUIDPreservedSymbols); +} + +static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) { + if (auto *VarSummary = dyn_cast<GlobalVarSummary>(GVS->getBaseObject())) + return !VarSummary->maybeReadOnly() && !VarSummary->maybeWriteOnly() && + (VarSummary->linkage() == GlobalValue::WeakODRLinkage || + VarSummary->linkage() == GlobalValue::LinkOnceODRLinkage); + return false; +} + +static void thinLTOInternalizeAndPromoteGUID( + GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID, + function_ref<bool(StringRef, GlobalValue::GUID)> isExported, + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + isPrevailing) { + for (auto &S : GVSummaryList) { + if (isExported(S->modulePath(), GUID)) { + if (GlobalValue::isLocalLinkage(S->linkage())) + S->setLinkage(GlobalValue::ExternalLinkage); + } else if (EnableLTOInternalization && + // Ignore local and appending linkage values since the linker + // doesn't resolve them. + !GlobalValue::isLocalLinkage(S->linkage()) && + (!GlobalValue::isInterposableLinkage(S->linkage()) || + isPrevailing(GUID, S.get())) && + S->linkage() != GlobalValue::AppendingLinkage && + // We can't internalize available_externally globals because this + // can break function pointer equality. + S->linkage() != GlobalValue::AvailableExternallyLinkage && + // Functions and read-only variables with linkonce_odr and + // weak_odr linkage can be internalized. We can't internalize + // linkonce_odr and weak_odr variables which are both modified + // and read somewhere in the program because reads and writes + // will become inconsistent. + !isWeakObjectWithRWAccess(S.get())) + S->setLinkage(GlobalValue::InternalLinkage); + } +} + +// Update the linkages in the given \p Index to mark exported values +// as external and non-exported values as internal. +void llvm::thinLTOInternalizeAndPromoteInIndex( + ModuleSummaryIndex &Index, + function_ref<bool(StringRef, GlobalValue::GUID)> isExported, + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + isPrevailing) { + for (auto &I : Index) + thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported, + isPrevailing); +} + +// Requires a destructor for std::vector<InputModule>. +InputFile::~InputFile() = default; + +Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) { + std::unique_ptr<InputFile> File(new InputFile); + + Expected<IRSymtabFile> FOrErr = readIRSymtab(Object); + if (!FOrErr) + return FOrErr.takeError(); + + File->TargetTriple = FOrErr->TheReader.getTargetTriple(); + File->SourceFileName = FOrErr->TheReader.getSourceFileName(); + File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts(); + File->DependentLibraries = FOrErr->TheReader.getDependentLibraries(); + File->ComdatTable = FOrErr->TheReader.getComdatTable(); + + for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) { + size_t Begin = File->Symbols.size(); + for (const irsymtab::Reader::SymbolRef &Sym : + FOrErr->TheReader.module_symbols(I)) + // Skip symbols that are irrelevant to LTO. Note that this condition needs + // to match the one in Skip() in LTO::addRegularLTO(). + if (Sym.isGlobal() && !Sym.isFormatSpecific()) + File->Symbols.push_back(Sym); + File->ModuleSymIndices.push_back({Begin, File->Symbols.size()}); + } + + File->Mods = FOrErr->Mods; + File->Strtab = std::move(FOrErr->Strtab); + return std::move(File); +} + +StringRef InputFile::getName() const { + return Mods[0].getModuleIdentifier(); +} + +BitcodeModule &InputFile::getSingleBitcodeModule() { + assert(Mods.size() == 1 && "Expect only one bitcode module"); + return Mods[0]; +} + +LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, + Config &Conf) + : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel), + Ctx(Conf), CombinedModule(std::make_unique<Module>("ld-temp.o", Ctx)), + Mover(std::make_unique<IRMover>(*CombinedModule)) {} + +LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) + : Backend(Backend), CombinedIndex(/*HaveGVs*/ false) { + if (!Backend) + this->Backend = + createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); +} + +LTO::LTO(Config Conf, ThinBackend Backend, + unsigned ParallelCodeGenParallelismLevel) + : Conf(std::move(Conf)), + RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), + ThinLTO(std::move(Backend)) {} + +// Requires a destructor for MapVector<BitcodeModule>. +LTO::~LTO() = default; + +// Add the symbols in the given module to the GlobalResolutions map, and resolve +// their partitions. +void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, + ArrayRef<SymbolResolution> Res, + unsigned Partition, bool InSummary) { + auto *ResI = Res.begin(); + auto *ResE = Res.end(); + (void)ResE; + for (const InputFile::Symbol &Sym : Syms) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + + StringRef Name = Sym.getName(); + Triple TT(RegularLTO.CombinedModule->getTargetTriple()); + // Strip the __imp_ prefix from COFF dllimport symbols (similar to the + // way they are handled by lld), otherwise we can end up with two + // global resolutions (one with and one for a copy of the symbol without). + if (TT.isOSBinFormatCOFF() && Name.startswith("__imp_")) + Name = Name.substr(strlen("__imp_")); + auto &GlobalRes = GlobalResolutions[Name]; + GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); + if (Res.Prevailing) { + assert(!GlobalRes.Prevailing && + "Multiple prevailing defs are not allowed"); + GlobalRes.Prevailing = true; + GlobalRes.IRName = Sym.getIRName(); + } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) { + // Sometimes it can be two copies of symbol in a module and prevailing + // symbol can have no IR name. That might happen if symbol is defined in + // module level inline asm block. In case we have multiple modules with + // the same symbol we want to use IR name of the prevailing symbol. + // Otherwise, if we haven't seen a prevailing symbol, set the name so that + // we can later use it to check if there is any prevailing copy in IR. + GlobalRes.IRName = Sym.getIRName(); + } + + // Set the partition to external if we know it is re-defined by the linker + // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a + // regular object, is referenced from llvm.compiler_used, or was already + // recorded as being referenced from a different partition. + if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || + (GlobalRes.Partition != GlobalResolution::Unknown && + GlobalRes.Partition != Partition)) { + GlobalRes.Partition = GlobalResolution::External; + } else + // First recorded reference, save the current partition. + GlobalRes.Partition = Partition; + + // Flag as visible outside of summary if visible from a regular object or + // from a module that does not have a summary. + GlobalRes.VisibleOutsideSummary |= + (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary); + } +} + +static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, + ArrayRef<SymbolResolution> Res) { + StringRef Path = Input->getName(); + OS << Path << '\n'; + auto ResI = Res.begin(); + for (const InputFile::Symbol &Sym : Input->symbols()) { + assert(ResI != Res.end()); + SymbolResolution Res = *ResI++; + + OS << "-r=" << Path << ',' << Sym.getName() << ','; + if (Res.Prevailing) + OS << 'p'; + if (Res.FinalDefinitionInLinkageUnit) + OS << 'l'; + if (Res.VisibleToRegularObj) + OS << 'x'; + if (Res.LinkerRedefined) + OS << 'r'; + OS << '\n'; + } + OS.flush(); + assert(ResI == Res.end()); +} + +Error LTO::add(std::unique_ptr<InputFile> Input, + ArrayRef<SymbolResolution> Res) { + assert(!CalledGetMaxTasks); + + if (Conf.ResolutionFile) + writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); + + if (RegularLTO.CombinedModule->getTargetTriple().empty()) + RegularLTO.CombinedModule->setTargetTriple(Input->getTargetTriple()); + + const SymbolResolution *ResI = Res.begin(); + for (unsigned I = 0; I != Input->Mods.size(); ++I) + if (Error Err = addModule(*Input, I, ResI, Res.end())) + return Err; + + assert(ResI == Res.end()); + return Error::success(); +} + +Error LTO::addModule(InputFile &Input, unsigned ModI, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo(); + if (!LTOInfo) + return LTOInfo.takeError(); + + if (EnableSplitLTOUnit.hasValue()) { + // If only some modules were split, flag this in the index so that + // we can skip or error on optimizations that need consistently split + // modules (whole program devirt and lower type tests). + if (EnableSplitLTOUnit.getValue() != LTOInfo->EnableSplitLTOUnit) + ThinLTO.CombinedIndex.setPartiallySplitLTOUnits(); + } else + EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit; + + BitcodeModule BM = Input.Mods[ModI]; + auto ModSyms = Input.module_symbols(ModI); + addModuleToGlobalRes(ModSyms, {ResI, ResE}, + LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, + LTOInfo->HasSummary); + + if (LTOInfo->IsThinLTO) + return addThinLTO(BM, ModSyms, ResI, ResE); + + Expected<RegularLTOState::AddedModule> ModOrErr = + addRegularLTO(BM, ModSyms, ResI, ResE); + if (!ModOrErr) + return ModOrErr.takeError(); + + if (!LTOInfo->HasSummary) + return linkRegularLTO(std::move(*ModOrErr), /*LivenessFromIndex=*/false); + + // Regular LTO module summaries are added to a dummy module that represents + // the combined regular LTO module. + if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "", -1ull)) + return Err; + RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr)); + return Error::success(); +} + +// Checks whether the given global value is in a non-prevailing comdat +// (comdat containing values the linker indicated were not prevailing, +// which we then dropped to available_externally), and if so, removes +// it from the comdat. This is called for all global values to ensure the +// comdat is empty rather than leaving an incomplete comdat. It is needed for +// regular LTO modules, in case we are in a mixed-LTO mode (both regular +// and thin LTO modules) compilation. Since the regular LTO module will be +// linked first in the final native link, we want to make sure the linker +// doesn't select any of these incomplete comdats that would be left +// in the regular LTO module without this cleanup. +static void +handleNonPrevailingComdat(GlobalValue &GV, + std::set<const Comdat *> &NonPrevailingComdats) { + Comdat *C = GV.getComdat(); + if (!C) + return; + + if (!NonPrevailingComdats.count(C)) + return; + + // Additionally need to drop externally visible global values from the comdat + // to available_externally, so that there aren't multiply defined linker + // errors. + if (!GV.hasLocalLinkage()) + GV.setLinkage(GlobalValue::AvailableExternallyLinkage); + + if (auto GO = dyn_cast<GlobalObject>(&GV)) + GO->setComdat(nullptr); +} + +// Add a regular LTO object to the link. +// The resulting module needs to be linked into the combined LTO module with +// linkRegularLTO. +Expected<LTO::RegularLTOState::AddedModule> +LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + RegularLTOState::AddedModule Mod; + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + Module &M = **MOrErr; + Mod.M = std::move(*MOrErr); + + if (Error Err = M.materializeMetadata()) + return std::move(Err); + UpgradeDebugInfo(M); + + ModuleSymbolTable SymTab; + SymTab.addModule(&M); + + for (GlobalVariable &GV : M.globals()) + if (GV.hasAppendingLinkage()) + Mod.Keep.push_back(&GV); + + DenseSet<GlobalObject *> AliasedGlobals; + for (auto &GA : M.aliases()) + if (GlobalObject *GO = GA.getBaseObject()) + AliasedGlobals.insert(GO); + + // In this function we need IR GlobalValues matching the symbols in Syms + // (which is not backed by a module), so we need to enumerate them in the same + // order. The symbol enumeration order of a ModuleSymbolTable intentionally + // matches the order of an irsymtab, but when we read the irsymtab in + // InputFile::create we omit some symbols that are irrelevant to LTO. The + // Skip() function skips the same symbols from the module as InputFile does + // from the symbol table. + auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end(); + auto Skip = [&]() { + while (MsymI != MsymE) { + auto Flags = SymTab.getSymbolFlags(*MsymI); + if ((Flags & object::BasicSymbolRef::SF_Global) && + !(Flags & object::BasicSymbolRef::SF_FormatSpecific)) + return; + ++MsymI; + } + }; + Skip(); + + std::set<const Comdat *> NonPrevailingComdats; + for (const InputFile::Symbol &Sym : Syms) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + + assert(MsymI != MsymE); + ModuleSymbolTable::Symbol Msym = *MsymI++; + Skip(); + + if (GlobalValue *GV = Msym.dyn_cast<GlobalValue *>()) { + if (Res.Prevailing) { + if (Sym.isUndefined()) + continue; + Mod.Keep.push_back(GV); + // For symbols re-defined with linker -wrap and -defsym options, + // set the linkage to weak to inhibit IPO. The linkage will be + // restored by the linker. + if (Res.LinkerRedefined) + GV->setLinkage(GlobalValue::WeakAnyLinkage); + + GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage(); + if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) + GV->setLinkage(GlobalValue::getWeakLinkage( + GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); + } else if (isa<GlobalObject>(GV) && + (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() || + GV->hasAvailableExternallyLinkage()) && + !AliasedGlobals.count(cast<GlobalObject>(GV))) { + // Any of the above three types of linkage indicates that the + // chosen prevailing symbol will have the same semantics as this copy of + // the symbol, so we may be able to link it with available_externally + // linkage. We will decide later whether to do that when we link this + // module (in linkRegularLTO), based on whether it is undefined. + Mod.Keep.push_back(GV); + GV->setLinkage(GlobalValue::AvailableExternallyLinkage); + if (GV->hasComdat()) + NonPrevailingComdats.insert(GV->getComdat()); + cast<GlobalObject>(GV)->setComdat(nullptr); + } + + // Set the 'local' flag based on the linker resolution for this symbol. + if (Res.FinalDefinitionInLinkageUnit) { + GV->setDSOLocal(true); + if (GV->hasDLLImportStorageClass()) + GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes:: + DefaultStorageClass); + } + } + // Common resolution: collect the maximum size/alignment over all commons. + // We also record if we see an instance of a common as prevailing, so that + // if none is prevailing we can ignore it later. + if (Sym.isCommon()) { + // FIXME: We should figure out what to do about commons defined by asm. + // For now they aren't reported correctly by ModuleSymbolTable. + auto &CommonRes = RegularLTO.Commons[Sym.getIRName()]; + CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); + CommonRes.Align = + std::max(CommonRes.Align, MaybeAlign(Sym.getCommonAlignment())); + CommonRes.Prevailing |= Res.Prevailing; + } + + } + if (!M.getComdatSymbolTable().empty()) + for (GlobalValue &GV : M.global_values()) + handleNonPrevailingComdat(GV, NonPrevailingComdats); + assert(MsymI == MsymE); + return std::move(Mod); +} + +Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, + bool LivenessFromIndex) { + std::vector<GlobalValue *> Keep; + for (GlobalValue *GV : Mod.Keep) { + if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) + continue; + + if (!GV->hasAvailableExternallyLinkage()) { + Keep.push_back(GV); + continue; + } + + // Only link available_externally definitions if we don't already have a + // definition. + GlobalValue *CombinedGV = + RegularLTO.CombinedModule->getNamedValue(GV->getName()); + if (CombinedGV && !CombinedGV->isDeclaration()) + continue; + + Keep.push_back(GV); + } + + return RegularLTO.Mover->move(std::move(Mod.M), Keep, + [](GlobalValue &, IRMover::ValueAdder) {}, + /* IsPerformingImport */ false); +} + +// Add a ThinLTO module to the link. +Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + if (Error Err = + BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), + ThinLTO.ModuleMap.size())) + return Err; + + for (const InputFile::Symbol &Sym : Syms) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + + if (!Sym.getIRName().empty()) { + auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( + Sym.getIRName(), GlobalValue::ExternalLinkage, "")); + if (Res.Prevailing) { + ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + + // For linker redefined symbols (via --wrap or --defsym) we want to + // switch the linkage to `weak` to prevent IPOs from happening. + // Find the summary in the module for this very GV and record the new + // linkage so that we can switch it when we import the GV. + if (Res.LinkerRedefined) + if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( + GUID, BM.getModuleIdentifier())) + S->setLinkage(GlobalValue::WeakAnyLinkage); + } + + // If the linker resolved the symbol to a local definition then mark it + // as local in the summary for the module we are adding. + if (Res.FinalDefinitionInLinkageUnit) { + if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( + GUID, BM.getModuleIdentifier())) { + S->setDSOLocal(true); + } + } + } + } + + if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) + return make_error<StringError>( + "Expected at most one ThinLTO module per bitcode file", + inconvertibleErrorCode()); + + return Error::success(); +} + +unsigned LTO::getMaxTasks() const { + CalledGetMaxTasks = true; + return RegularLTO.ParallelCodeGenParallelismLevel + ThinLTO.ModuleMap.size(); +} + +// If only some of the modules were split, we cannot correctly handle +// code that contains type tests or type checked loads. +Error LTO::checkPartiallySplit() { + if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits()) + return Error::success(); + + Function *TypeTestFunc = RegularLTO.CombinedModule->getFunction( + Intrinsic::getName(Intrinsic::type_test)); + Function *TypeCheckedLoadFunc = RegularLTO.CombinedModule->getFunction( + Intrinsic::getName(Intrinsic::type_checked_load)); + + // First check if there are type tests / type checked loads in the + // merged regular LTO module IR. + if ((TypeTestFunc && !TypeTestFunc->use_empty()) || + (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty())) + return make_error<StringError>( + "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)", + inconvertibleErrorCode()); + + // Otherwise check if there are any recorded in the combined summary from the + // ThinLTO modules. + for (auto &P : ThinLTO.CombinedIndex) { + for (auto &S : P.second.SummaryList) { + auto *FS = dyn_cast<FunctionSummary>(S.get()); + if (!FS) + continue; + if (!FS->type_test_assume_vcalls().empty() || + !FS->type_checked_load_vcalls().empty() || + !FS->type_test_assume_const_vcalls().empty() || + !FS->type_checked_load_const_vcalls().empty() || + !FS->type_tests().empty()) + return make_error<StringError>( + "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)", + inconvertibleErrorCode()); + } + } + return Error::success(); +} + +Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { + // Compute "dead" symbols, we don't want to import/export these! + DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; + DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions; + for (auto &Res : GlobalResolutions) { + // Normally resolution have IR name of symbol. We can do nothing here + // otherwise. See comments in GlobalResolution struct for more details. + if (Res.second.IRName.empty()) + continue; + + GlobalValue::GUID GUID = GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); + + if (Res.second.VisibleOutsideSummary && Res.second.Prevailing) + GUIDPreservedSymbols.insert(GUID); + + GUIDPrevailingResolutions[GUID] = + Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No; + } + + auto isPrevailing = [&](GlobalValue::GUID G) { + auto It = GUIDPrevailingResolutions.find(G); + if (It == GUIDPrevailingResolutions.end()) + return PrevailingType::Unknown; + return It->second; + }; + computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols, + isPrevailing, Conf.OptLevel > 0); + + // Setup output file to emit statistics. + auto StatsFileOrErr = setupStatsFile(Conf.StatsFile); + if (!StatsFileOrErr) + return StatsFileOrErr.takeError(); + std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get()); + + // Finalize linking of regular LTO modules containing summaries now that + // we have computed liveness information. + for (auto &M : RegularLTO.ModsWithSummaries) + if (Error Err = linkRegularLTO(std::move(M), + /*LivenessFromIndex=*/true)) + return Err; + + // Ensure we don't have inconsistently split LTO units with type tests. + if (Error Err = checkPartiallySplit()) + return Err; + + Error Result = runRegularLTO(AddStream); + if (!Result) + Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); + + if (StatsFile) + PrintStatisticsJSON(StatsFile->os()); + + return Result; +} + +Error LTO::runRegularLTO(AddStreamFn AddStream) { + // Make sure commons have the right size/alignment: we kept the largest from + // all the prevailing when adding the inputs, and we apply it here. + const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); + for (auto &I : RegularLTO.Commons) { + if (!I.second.Prevailing) + // Don't do anything if no instance of this common was prevailing. + continue; + GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(I.first); + if (OldGV && DL.getTypeAllocSize(OldGV->getValueType()) == I.second.Size) { + // Don't create a new global if the type is already correct, just make + // sure the alignment is correct. + OldGV->setAlignment(I.second.Align); + continue; + } + ArrayType *Ty = + ArrayType::get(Type::getInt8Ty(RegularLTO.Ctx), I.second.Size); + auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false, + GlobalValue::CommonLinkage, + ConstantAggregateZero::get(Ty), ""); + GV->setAlignment(I.second.Align); + if (OldGV) { + OldGV->replaceAllUsesWith(ConstantExpr::getBitCast(GV, OldGV->getType())); + GV->takeName(OldGV); + OldGV->eraseFromParent(); + } else { + GV->setName(I.first); + } + } + + if (Conf.PreOptModuleHook && + !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule)) + return Error::success(); + + if (!Conf.CodeGenOnly) { + for (const auto &R : GlobalResolutions) { + if (!R.second.isPrevailingIRSymbol()) + continue; + if (R.second.Partition != 0 && + R.second.Partition != GlobalResolution::External) + continue; + + GlobalValue *GV = + RegularLTO.CombinedModule->getNamedValue(R.second.IRName); + // Ignore symbols defined in other partitions. + // Also skip declarations, which are not allowed to have internal linkage. + if (!GV || GV->hasLocalLinkage() || GV->isDeclaration()) + continue; + GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global + : GlobalValue::UnnamedAddr::None); + if (EnableLTOInternalization && R.second.Partition == 0) + GV->setLinkage(GlobalValue::InternalLinkage); + } + + RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + + if (Conf.PostInternalizeModuleHook && + !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) + return Error::success(); + } + return backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, + std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex); +} + +static const char *libcallRoutineNames[] = { +#define HANDLE_LIBCALL(code, name) name, +#include "llvm/IR/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL +}; + +ArrayRef<const char*> LTO::getRuntimeLibcallSymbols() { + return makeArrayRef(libcallRoutineNames); +} + +/// This class defines the interface to the ThinLTO backend. +class lto::ThinBackendProc { +protected: + Config &Conf; + ModuleSummaryIndex &CombinedIndex; + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries; + +public: + ThinBackendProc(Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries) + : Conf(Conf), CombinedIndex(CombinedIndex), + ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {} + + virtual ~ThinBackendProc() {} + virtual Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + MapVector<StringRef, BitcodeModule> &ModuleMap) = 0; + virtual Error wait() = 0; +}; + +namespace { +class InProcessThinBackend : public ThinBackendProc { + ThreadPool BackendThreadPool; + AddStreamFn AddStream; + NativeObjectCache Cache; + std::set<GlobalValue::GUID> CfiFunctionDefs; + std::set<GlobalValue::GUID> CfiFunctionDecls; + + Optional<Error> Err; + std::mutex ErrMu; + +public: + InProcessThinBackend( + Config &Conf, ModuleSummaryIndex &CombinedIndex, + unsigned ThinLTOParallelismLevel, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, NativeObjectCache Cache) + : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), + BackendThreadPool(ThinLTOParallelismLevel), + AddStream(std::move(AddStream)), Cache(std::move(Cache)) { + for (auto &Name : CombinedIndex.cfiFunctionDefs()) + CfiFunctionDefs.insert( + GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); + for (auto &Name : CombinedIndex.cfiFunctionDecls()) + CfiFunctionDecls.insert( + GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); + } + + Error runThinLTOBackendThread( + AddStreamFn AddStream, NativeObjectCache Cache, unsigned Task, + BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector<StringRef, BitcodeModule> &ModuleMap) { + auto RunThinBackend = [&](AddStreamFn AddStream) { + LTOLLVMContext BackendContext(Conf); + Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext); + if (!MOrErr) + return MOrErr.takeError(); + + return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, + ImportList, DefinedGlobals, ModuleMap); + }; + + auto ModuleID = BM.getModuleIdentifier(); + + if (!Cache || !CombinedIndex.modulePaths().count(ModuleID) || + all_of(CombinedIndex.getModuleHash(ModuleID), + [](uint32_t V) { return V == 0; })) + // Cache disabled or no entry for this module in the combined index or + // no module hash. + return RunThinBackend(AddStream); + + SmallString<40> Key; + // The module may be cached, this helps handling it. + computeLTOCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList, + ExportList, ResolvedODR, DefinedGlobals, CfiFunctionDefs, + CfiFunctionDecls); + if (AddStreamFn CacheAddStream = Cache(Task, Key)) + return RunThinBackend(CacheAddStream); + + return Error::success(); + } + + Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + MapVector<StringRef, BitcodeModule> &ModuleMap) override { + StringRef ModulePath = BM.getModuleIdentifier(); + assert(ModuleToDefinedGVSummaries.count(ModulePath)); + const GVSummaryMapTy &DefinedGlobals = + ModuleToDefinedGVSummaries.find(ModulePath)->second; + BackendThreadPool.async( + [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> + &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector<StringRef, BitcodeModule> &ModuleMap) { + Error E = runThinLTOBackendThread( + AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList, + ResolvedODR, DefinedGlobals, ModuleMap); + if (E) { + std::unique_lock<std::mutex> L(ErrMu); + if (Err) + Err = joinErrors(std::move(*Err), std::move(E)); + else + Err = std::move(E); + } + }, + BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList), + std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap)); + return Error::success(); + } + + Error wait() override { + BackendThreadPool.wait(); + if (Err) + return std::move(*Err); + else + return Error::success(); + } +}; +} // end anonymous namespace + +ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) { + return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, NativeObjectCache Cache) { + return std::make_unique<InProcessThinBackend>( + Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries, + AddStream, Cache); + }; +} + +// Given the original \p Path to an output file, replace any path +// prefix matching \p OldPrefix with \p NewPrefix. Also, create the +// resulting directory if it does not yet exist. +std::string lto::getThinLTOOutputFile(const std::string &Path, + const std::string &OldPrefix, + const std::string &NewPrefix) { + if (OldPrefix.empty() && NewPrefix.empty()) + return Path; + SmallString<128> NewPath(Path); + llvm::sys::path::replace_path_prefix(NewPath, OldPrefix, NewPrefix); + StringRef ParentPath = llvm::sys::path::parent_path(NewPath.str()); + if (!ParentPath.empty()) { + // Make sure the new directory exists, creating it if necessary. + if (std::error_code EC = llvm::sys::fs::create_directories(ParentPath)) + llvm::errs() << "warning: could not create directory '" << ParentPath + << "': " << EC.message() << '\n'; + } + return NewPath.str(); +} + +namespace { +class WriteIndexesThinBackend : public ThinBackendProc { + std::string OldPrefix, NewPrefix; + bool ShouldEmitImportsFiles; + raw_fd_ostream *LinkedObjectsFile; + lto::IndexWriteCallback OnWrite; + +public: + WriteIndexesThinBackend( + Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles, + raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite) + : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), + OldPrefix(OldPrefix), NewPrefix(NewPrefix), + ShouldEmitImportsFiles(ShouldEmitImportsFiles), + LinkedObjectsFile(LinkedObjectsFile), OnWrite(OnWrite) {} + + Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + MapVector<StringRef, BitcodeModule> &ModuleMap) override { + StringRef ModulePath = BM.getModuleIdentifier(); + std::string NewModulePath = + getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); + + if (LinkedObjectsFile) + *LinkedObjectsFile << NewModulePath << '\n'; + + std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex; + gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, + ImportList, ModuleToSummariesForIndex); + + std::error_code EC; + raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, + sys::fs::OpenFlags::OF_None); + if (EC) + return errorCodeToError(EC); + WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); + + if (ShouldEmitImportsFiles) { + EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports", + ModuleToSummariesForIndex); + if (EC) + return errorCodeToError(EC); + } + + if (OnWrite) + OnWrite(ModulePath); + return Error::success(); + } + + Error wait() override { return Error::success(); } +}; +} // end anonymous namespace + +ThinBackend lto::createWriteIndexesThinBackend( + std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles, + raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) { + return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, NativeObjectCache Cache) { + return std::make_unique<WriteIndexesThinBackend>( + Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix, + ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite); + }; +} + +Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + if (ThinLTO.ModuleMap.empty()) + return Error::success(); + + if (Conf.CombinedIndexHook && !Conf.CombinedIndexHook(ThinLTO.CombinedIndex)) + return Error::success(); + + // Collect for each module the list of function it defines (GUID -> + // Summary). + StringMap<GVSummaryMapTy> + ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size()); + ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule( + ModuleToDefinedGVSummaries); + // Create entries for any modules that didn't have any GV summaries + // (either they didn't have any GVs to start with, or we suppressed + // generation of the summaries because they e.g. had inline assembly + // uses that couldn't be promoted/renamed on export). This is so + // InProcessThinBackend::start can still launch a backend thread, which + // is passed the map of summaries for the module, without any special + // handling for this case. + for (auto &Mod : ThinLTO.ModuleMap) + if (!ModuleToDefinedGVSummaries.count(Mod.first)) + ModuleToDefinedGVSummaries.try_emplace(Mod.first); + + // Synthesize entry counts for functions in the CombinedIndex. + computeSyntheticCounts(ThinLTO.CombinedIndex); + + StringMap<FunctionImporter::ImportMapTy> ImportLists( + ThinLTO.ModuleMap.size()); + StringMap<FunctionImporter::ExportSetTy> ExportLists( + ThinLTO.ModuleMap.size()); + StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; + + if (DumpThinCGSCCs) + ThinLTO.CombinedIndex.dumpSCCs(outs()); + + std::set<GlobalValue::GUID> ExportedGUIDs; + + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap; + runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, + LocalWPDTargetsMap); + + if (Conf.OptLevel > 0) + ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, + ImportLists, ExportLists); + + // Figure out which symbols need to be internalized. This also needs to happen + // at -O0 because summary-based DCE is implemented using internalization, and + // we must apply DCE consistently with the full LTO module in order to avoid + // undefined references during the final link. + for (auto &Res : GlobalResolutions) { + // If the symbol does not have external references or it is not prevailing, + // then not need to mark it as exported from a ThinLTO partition. + if (Res.second.Partition != GlobalResolution::External || + !Res.second.isPrevailingIRSymbol()) + continue; + auto GUID = GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); + // Mark exported unless index-based analysis determined it to be dead. + if (ThinLTO.CombinedIndex.isGUIDLive(GUID)) + ExportedGUIDs.insert(GUID); + } + + // Any functions referenced by the jump table in the regular LTO object must + // be exported. + for (auto &Def : ThinLTO.CombinedIndex.cfiFunctionDefs()) + ExportedGUIDs.insert( + GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Def))); + + auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { + const auto &ExportList = ExportLists.find(ModuleIdentifier); + return (ExportList != ExportLists.end() && + ExportList->second.count(GUID)) || + ExportedGUIDs.count(GUID); + }; + + // Update local devirtualized targets that were exported by cross-module + // importing or by other devirtualizations marked in the ExportedGUIDs set. + updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported, + LocalWPDTargetsMap); + + auto isPrevailing = [&](GlobalValue::GUID GUID, + const GlobalValueSummary *S) { + return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + }; + thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported, + isPrevailing); + + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + thinLTOResolvePrevailingInIndex(ThinLTO.CombinedIndex, isPrevailing, + recordNewLinkage, GUIDPreservedSymbols); + + std::unique_ptr<ThinBackendProc> BackendProc = + ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, + AddStream, Cache); + + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined + // module and parallel code generation partitions. + unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel; + for (auto &Mod : ThinLTO.ModuleMap) { + if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], + ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap)) + return E; + ++Task; + } + + return BackendProc->wait(); +} + +Expected<std::unique_ptr<ToolOutputFile>> +lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, + StringRef RemarksPasses, StringRef RemarksFormat, + bool RemarksWithHotness, int Count) { + std::string Filename = RemarksFilename; + if (!Filename.empty() && Count != -1) + Filename += ".thin." + llvm::utostr(Count) + ".yaml"; + + auto ResultOrErr = llvm::setupOptimizationRemarks( + Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness); + if (Error E = ResultOrErr.takeError()) + return std::move(E); + + if (*ResultOrErr) + (*ResultOrErr)->keep(); + + return ResultOrErr; +} + +Expected<std::unique_ptr<ToolOutputFile>> +lto::setupStatsFile(StringRef StatsFilename) { + // Setup output file to emit statistics. + if (StatsFilename.empty()) + return nullptr; + + llvm::EnableStatistics(false); + std::error_code EC; + auto StatsFile = + std::make_unique<ToolOutputFile>(StatsFilename, EC, sys::fs::OF_None); + if (EC) + return errorCodeToError(EC); + + StatsFile->keep(); + return std::move(StatsFile); +} diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp new file mode 100644 index 0000000000000..2761f8367b0da --- /dev/null +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -0,0 +1,547 @@ +//===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the "backend" phase of LTO, i.e. it performs +// optimization and code generation on a loaded module. It is generally used +// internally by the LTO class but can also be used independently, for example +// to implement a standalone ThinLTO backend. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/LTOBackend.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/RemarkStreamer.h" +#include "llvm/IR/Verifier.h" +#include "llvm/LTO/LTO.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/StandardInstrumentations.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +using namespace llvm; +using namespace lto; + +LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { + errs() << "failed to open " << Path << ": " << Msg << '\n'; + errs().flush(); + exit(1); +} + +Error Config::addSaveTemps(std::string OutputFileName, + bool UseInputModulePath) { + ShouldDiscardValueNames = false; + + std::error_code EC; + ResolutionFile = std::make_unique<raw_fd_ostream>( + OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::OF_Text); + if (EC) + return errorCodeToError(EC); + + auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { + // Keep track of the hook provided by the linker, which also needs to run. + ModuleHookFn LinkerHook = Hook; + Hook = [=](unsigned Task, const Module &M) { + // If the linker's hook returned false, we need to pass that result + // through. + if (LinkerHook && !LinkerHook(Task, M)) + return false; + + std::string PathPrefix; + // If this is the combined module (not a ThinLTO backend compile) or the + // user hasn't requested using the input module's path, emit to a file + // named from the provided OutputFileName with the Task ID appended. + if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { + PathPrefix = OutputFileName; + if (Task != (unsigned)-1) + PathPrefix += utostr(Task) + "."; + } else + PathPrefix = M.getModuleIdentifier() + "."; + std::string Path = PathPrefix + PathSuffix + ".bc"; + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + // Because -save-temps is a debugging feature, we report the error + // directly and exit. + if (EC) + reportOpenError(Path, EC.message()); + WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false); + return true; + }; + }; + + setHook("0.preopt", PreOptModuleHook); + setHook("1.promote", PostPromoteModuleHook); + setHook("2.internalize", PostInternalizeModuleHook); + setHook("3.import", PostImportModuleHook); + setHook("4.opt", PostOptModuleHook); + setHook("5.precodegen", PreCodeGenModuleHook); + + CombinedIndexHook = [=](const ModuleSummaryIndex &Index) { + std::string Path = OutputFileName + "index.bc"; + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + // Because -save-temps is a debugging feature, we report the error + // directly and exit. + if (EC) + reportOpenError(Path, EC.message()); + WriteIndexToFile(Index, OS); + + Path = OutputFileName + "index.dot"; + raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) + reportOpenError(Path, EC.message()); + Index.exportToDot(OSDot); + return true; + }; + + return Error::success(); +} + +namespace { + +std::unique_ptr<TargetMachine> +createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) { + StringRef TheTriple = M.getTargetTriple(); + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures(Triple(TheTriple)); + for (const std::string &A : Conf.MAttrs) + Features.AddFeature(A); + + Reloc::Model RelocModel; + if (Conf.RelocModel) + RelocModel = *Conf.RelocModel; + else + RelocModel = + M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; + + Optional<CodeModel::Model> CodeModel; + if (Conf.CodeModel) + CodeModel = *Conf.CodeModel; + else + CodeModel = M.getCodeModel(); + + return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( + TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel, + CodeModel, Conf.CGOptLevel)); +} + +static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, + unsigned OptLevel, bool IsThinLTO, + ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary) { + Optional<PGOOptions> PGOOpt; + if (!Conf.SampleProfile.empty()) + PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, + PGOOptions::SampleUse, PGOOptions::NoCSAction, true); + else if (Conf.RunCSIRInstr) { + PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, + PGOOptions::IRUse, PGOOptions::CSIRInstr); + } else if (!Conf.CSIRProfile.empty()) { + PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, + PGOOptions::IRUse, PGOOptions::CSIRUse); + } + + PassInstrumentationCallbacks PIC; + StandardInstrumentations SI; + SI.registerCallbacks(PIC); + PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC); + AAManager AA; + + // Parse a custom AA pipeline if asked to. + if (auto Err = PB.parseAAPipeline(AA, "default")) + report_fatal_error("Error parsing default AA pipeline"); + + LoopAnalysisManager LAM(Conf.DebugPassManager); + FunctionAnalysisManager FAM(Conf.DebugPassManager); + CGSCCAnalysisManager CGAM(Conf.DebugPassManager); + ModuleAnalysisManager MAM(Conf.DebugPassManager); + + // Register the AA manager first so that our version is the one used. + FAM.registerPass([&] { return std::move(AA); }); + + // Register all the basic analyses with the managers. + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + ModulePassManager MPM(Conf.DebugPassManager); + // FIXME (davide): verify the input. + + PassBuilder::OptimizationLevel OL; + + switch (OptLevel) { + default: + llvm_unreachable("Invalid optimization level"); + case 0: + OL = PassBuilder::O0; + break; + case 1: + OL = PassBuilder::O1; + break; + case 2: + OL = PassBuilder::O2; + break; + case 3: + OL = PassBuilder::O3; + break; + } + + if (IsThinLTO) + MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager, + ImportSummary); + else + MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary); + MPM.run(Mod, MAM); + + // FIXME (davide): verify the output. +} + +static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM, + std::string PipelineDesc, + std::string AAPipelineDesc, + bool DisableVerify) { + PassBuilder PB(TM); + AAManager AA; + + // Parse a custom AA pipeline if asked to. + if (!AAPipelineDesc.empty()) + if (auto Err = PB.parseAAPipeline(AA, AAPipelineDesc)) + report_fatal_error("unable to parse AA pipeline description '" + + AAPipelineDesc + "': " + toString(std::move(Err))); + + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + + // Register the AA manager first so that our version is the one used. + FAM.registerPass([&] { return std::move(AA); }); + + // Register all the basic analyses with the managers. + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + ModulePassManager MPM; + + // Always verify the input. + MPM.addPass(VerifierPass()); + + // Now, add all the passes we've been requested to. + if (auto Err = PB.parsePassPipeline(MPM, PipelineDesc)) + report_fatal_error("unable to parse pass pipeline description '" + + PipelineDesc + "': " + toString(std::move(Err))); + + if (!DisableVerify) + MPM.addPass(VerifierPass()); + MPM.run(Mod, MAM); +} + +static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, + bool IsThinLTO, ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary) { + legacy::PassManager passes; + passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + + PassManagerBuilder PMB; + PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); + PMB.Inliner = createFunctionInliningPass(); + PMB.ExportSummary = ExportSummary; + PMB.ImportSummary = ImportSummary; + // Unconditionally verify input since it is not verified before this + // point and has unknown origin. + PMB.VerifyInput = true; + PMB.VerifyOutput = !Conf.DisableVerify; + PMB.LoopVectorize = true; + PMB.SLPVectorize = true; + PMB.OptLevel = Conf.OptLevel; + PMB.PGOSampleUse = Conf.SampleProfile; + PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr; + if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) { + PMB.EnablePGOCSInstrUse = true; + PMB.PGOInstrUse = Conf.CSIRProfile; + } + if (IsThinLTO) + PMB.populateThinLTOPassManager(passes); + else + PMB.populateLTOPassManager(passes); + passes.run(Mod); +} + +bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, + bool IsThinLTO, ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary) { + // FIXME: Plumb the combined index into the new pass manager. + if (!Conf.OptPipeline.empty()) + runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, + Conf.DisableVerify); + else if (Conf.UseNewPM) + runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary, + ImportSummary); + else + runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); + return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); +} + +void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream, + unsigned Task, Module &Mod) { + if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) + return; + + std::unique_ptr<ToolOutputFile> DwoOut; + SmallString<1024> DwoFile(Conf.SplitDwarfOutput); + if (!Conf.DwoDir.empty()) { + std::error_code EC; + if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir)) + report_fatal_error("Failed to create directory " + Conf.DwoDir + ": " + + EC.message()); + + DwoFile = Conf.DwoDir; + sys::path::append(DwoFile, std::to_string(Task) + ".dwo"); + TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str(); + } else + TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile; + + if (!DwoFile.empty()) { + std::error_code EC; + DwoOut = std::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::OF_None); + if (EC) + report_fatal_error("Failed to open " + DwoFile + ": " + EC.message()); + } + + auto Stream = AddStream(Task); + legacy::PassManager CodeGenPasses; + if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, + DwoOut ? &DwoOut->os() : nullptr, + Conf.CGFileType)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(Mod); + + if (DwoOut) + DwoOut->keep(); +} + +void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream, + unsigned ParallelCodeGenParallelismLevel, + std::unique_ptr<Module> Mod) { + ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel); + unsigned ThreadCount = 0; + const Target *T = &TM->getTarget(); + + SplitModule( + std::move(Mod), ParallelCodeGenParallelismLevel, + [&](std::unique_ptr<Module> MPart) { + // We want to clone the module in a new context to multi-thread the + // codegen. We do it by serializing partition modules to bitcode + // (while still on the main thread, in order to avoid data races) and + // spinning up new threads which deserialize the partitions into + // separate contexts. + // FIXME: Provide a more direct way to do this in LLVM. + SmallString<0> BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(*MPart, BCOS); + + // Enqueue the task + CodegenThreadPool.async( + [&](const SmallString<0> &BC, unsigned ThreadId) { + LTOLLVMContext Ctx(C); + Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( + MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); + + std::unique_ptr<TargetMachine> TM = + createTargetMachine(C, T, *MPartInCtx); + + codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx); + }, + // Pass BC using std::move to ensure that it get moved rather than + // copied into the thread's context. + std::move(BC), ThreadCount++); + }, + false); + + // Because the inner lambda (which runs in a worker thread) captures our local + // variables, we need to wait for the worker threads to terminate before we + // can leave the function scope. + CodegenThreadPool.wait(); +} + +Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) { + if (!C.OverrideTriple.empty()) + Mod.setTargetTriple(C.OverrideTriple); + else if (Mod.getTargetTriple().empty()) + Mod.setTargetTriple(C.DefaultTriple); + + std::string Msg; + const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg); + if (!T) + return make_error<StringError>(Msg, inconvertibleErrorCode()); + return T; +} + +} + +static Error +finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) { + // Make sure we flush the diagnostic remarks file in case the linker doesn't + // call the global destructors before exiting. + if (!DiagOutputFile) + return Error::success(); + DiagOutputFile->keep(); + DiagOutputFile->os().flush(); + return Error::success(); +} + +Error lto::backend(Config &C, AddStreamFn AddStream, + unsigned ParallelCodeGenParallelismLevel, + std::unique_ptr<Module> Mod, + ModuleSummaryIndex &CombinedIndex) { + Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod); + if (!TOrErr) + return TOrErr.takeError(); + + std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod); + + // Setup optimization remarks. + auto DiagFileOrErr = lto::setupOptimizationRemarks( + Mod->getContext(), C.RemarksFilename, C.RemarksPasses, C.RemarksFormat, + C.RemarksWithHotness); + if (!DiagFileOrErr) + return DiagFileOrErr.takeError(); + auto DiagnosticOutputFile = std::move(*DiagFileOrErr); + + if (!C.CodeGenOnly) { + if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false, + /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + } + + if (ParallelCodeGenParallelismLevel == 1) { + codegen(C, TM.get(), AddStream, 0, *Mod); + } else { + splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, + std::move(Mod)); + } + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); +} + +static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, + const ModuleSummaryIndex &Index) { + std::vector<GlobalValue*> DeadGVs; + for (auto &GV : Mod.global_values()) + if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID())) + if (!Index.isGlobalValueLive(GVS)) { + DeadGVs.push_back(&GV); + convertToDeclaration(GV); + } + + // Now that all dead bodies have been dropped, delete the actual objects + // themselves when possible. + for (GlobalValue *GV : DeadGVs) { + GV->removeDeadConstantUsers(); + // Might reference something defined in native object (i.e. dropped a + // non-prevailing IR def, but we need to keep the declaration). + if (GV->use_empty()) + GV->eraseFromParent(); + } +} + +Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, + Module &Mod, const ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const GVSummaryMapTy &DefinedGlobals, + MapVector<StringRef, BitcodeModule> &ModuleMap) { + Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod); + if (!TOrErr) + return TOrErr.takeError(); + + std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod); + + // Setup optimization remarks. + auto DiagFileOrErr = lto::setupOptimizationRemarks( + Mod.getContext(), Conf.RemarksFilename, Conf.RemarksPasses, + Conf.RemarksFormat, Conf.RemarksWithHotness, Task); + if (!DiagFileOrErr) + return DiagFileOrErr.takeError(); + auto DiagnosticOutputFile = std::move(*DiagFileOrErr); + + if (Conf.CodeGenOnly) { + codegen(Conf, TM.get(), AddStream, Task, Mod); + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + } + + if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + + renameModuleForThinLTO(Mod, CombinedIndex); + + dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex); + + thinLTOResolvePrevailingInModule(Mod, DefinedGlobals); + + if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + + if (!DefinedGlobals.empty()) + thinLTOInternalizeModule(Mod, DefinedGlobals); + + if (Conf.PostInternalizeModuleHook && + !Conf.PostInternalizeModuleHook(Task, Mod)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + + auto ModuleLoader = [&](StringRef Identifier) { + assert(Mod.getContext().isODRUniquingDebugTypes() && + "ODR Type uniquing should be enabled on the context"); + auto I = ModuleMap.find(Identifier); + assert(I != ModuleMap.end()); + return I->second.getLazyModule(Mod.getContext(), + /*ShouldLazyLoadMetadata=*/true, + /*IsImporting*/ true); + }; + + FunctionImporter Importer(CombinedIndex, ModuleLoader); + if (Error Err = Importer.importFunctions(Mod, ImportList).takeError()) + return Err; + + if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + + if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true, + /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + + codegen(Conf, TM.get(), AddStream, Task, Mod); + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); +} diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp new file mode 100644 index 0000000000000..8821928928672 --- /dev/null +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -0,0 +1,722 @@ +//===-LTOCodeGenerator.cpp - LLVM Link Time Optimizer ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Link Time Optimization library. This library is +// intended to be used by linker to optimize code at link time. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/legacy/LTOCodeGenerator.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/CodeGen/ParallelCG.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassTimingInfo.h" +#include "llvm/IR/RemarkStreamer.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/LTO/LTO.h" +#include "llvm/LTO/legacy/LTOModule.h" +#include "llvm/LTO/legacy/UpdateCompilerUsed.h" +#include "llvm/Linker/Linker.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include <system_error> +using namespace llvm; + +const char* LTOCodeGenerator::getVersionString() { +#ifdef LLVM_VERSION_INFO + return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO; +#else + return PACKAGE_NAME " version " PACKAGE_VERSION; +#endif +} + +namespace llvm { +cl::opt<bool> LTODiscardValueNames( + "lto-discard-value-names", + cl::desc("Strip names from Value during LTO (other than GlobalValue)."), +#ifdef NDEBUG + cl::init(true), +#else + cl::init(false), +#endif + cl::Hidden); + +cl::opt<bool> RemarksWithHotness( + "lto-pass-remarks-with-hotness", + cl::desc("With PGO, include profile count in optimization remarks"), + cl::Hidden); + +cl::opt<std::string> + RemarksFilename("lto-pass-remarks-output", + cl::desc("Output filename for pass remarks"), + cl::value_desc("filename")); + +cl::opt<std::string> + RemarksPasses("lto-pass-remarks-filter", + cl::desc("Only record optimization remarks from passes whose " + "names match the given regular expression"), + cl::value_desc("regex")); + +cl::opt<std::string> RemarksFormat( + "lto-pass-remarks-format", + cl::desc("The format used for serializing remarks (default: YAML)"), + cl::value_desc("format"), cl::init("yaml")); + +cl::opt<std::string> LTOStatsFile( + "lto-stats-file", + cl::desc("Save statistics to the specified file"), + cl::Hidden); +} + +LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context) + : Context(Context), MergedModule(new Module("ld-temp.o", Context)), + TheLinker(new Linker(*MergedModule)) { + Context.setDiscardValueNames(LTODiscardValueNames); + Context.enableDebugTypeODRUniquing(); + initializeLTOPasses(); +} + +LTOCodeGenerator::~LTOCodeGenerator() {} + +// Initialize LTO passes. Please keep this function in sync with +// PassManagerBuilder::populateLTOPassManager(), and make sure all LTO +// passes are initialized. +void LTOCodeGenerator::initializeLTOPasses() { + PassRegistry &R = *PassRegistry::getPassRegistry(); + + initializeInternalizeLegacyPassPass(R); + initializeIPSCCPLegacyPassPass(R); + initializeGlobalOptLegacyPassPass(R); + initializeConstantMergeLegacyPassPass(R); + initializeDAHPass(R); + initializeInstructionCombiningPassPass(R); + initializeSimpleInlinerPass(R); + initializePruneEHPass(R); + initializeGlobalDCELegacyPassPass(R); + initializeArgPromotionPass(R); + initializeJumpThreadingPass(R); + initializeSROALegacyPassPass(R); + initializeAttributorLegacyPassPass(R); + initializePostOrderFunctionAttrsLegacyPassPass(R); + initializeReversePostOrderFunctionAttrsLegacyPassPass(R); + initializeGlobalsAAWrapperPassPass(R); + initializeLegacyLICMPassPass(R); + initializeMergedLoadStoreMotionLegacyPassPass(R); + initializeGVNLegacyPassPass(R); + initializeMemCpyOptLegacyPassPass(R); + initializeDCELegacyPassPass(R); + initializeCFGSimplifyPassPass(R); +} + +void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { + const std::vector<StringRef> &undefs = Mod->getAsmUndefinedRefs(); + for (int i = 0, e = undefs.size(); i != e; ++i) + AsmUndefinedRefs.insert(undefs[i]); +} + +bool LTOCodeGenerator::addModule(LTOModule *Mod) { + assert(&Mod->getModule().getContext() == &Context && + "Expected module in same context"); + + bool ret = TheLinker->linkInModule(Mod->takeModule()); + setAsmUndefinedRefs(Mod); + + // We've just changed the input, so let's make sure we verify it. + HasVerifiedInput = false; + + return !ret; +} + +void LTOCodeGenerator::setModule(std::unique_ptr<LTOModule> Mod) { + assert(&Mod->getModule().getContext() == &Context && + "Expected module in same context"); + + AsmUndefinedRefs.clear(); + + MergedModule = Mod->takeModule(); + TheLinker = std::make_unique<Linker>(*MergedModule); + setAsmUndefinedRefs(&*Mod); + + // We've just changed the input, so let's make sure we verify it. + HasVerifiedInput = false; +} + +void LTOCodeGenerator::setTargetOptions(const TargetOptions &Options) { + this->Options = Options; +} + +void LTOCodeGenerator::setDebugInfo(lto_debug_model Debug) { + switch (Debug) { + case LTO_DEBUG_MODEL_NONE: + EmitDwarfDebugInfo = false; + return; + + case LTO_DEBUG_MODEL_DWARF: + EmitDwarfDebugInfo = true; + return; + } + llvm_unreachable("Unknown debug format!"); +} + +void LTOCodeGenerator::setOptLevel(unsigned Level) { + OptLevel = Level; + switch (OptLevel) { + case 0: + CGOptLevel = CodeGenOpt::None; + return; + case 1: + CGOptLevel = CodeGenOpt::Less; + return; + case 2: + CGOptLevel = CodeGenOpt::Default; + return; + case 3: + CGOptLevel = CodeGenOpt::Aggressive; + return; + } + llvm_unreachable("Unknown optimization level!"); +} + +bool LTOCodeGenerator::writeMergedModules(StringRef Path) { + if (!determineTarget()) + return false; + + // We always run the verifier once on the merged module. + verifyMergedModuleOnce(); + + // mark which symbols can not be internalized + applyScopeRestrictions(); + + // create output file + std::error_code EC; + ToolOutputFile Out(Path, EC, sys::fs::OF_None); + if (EC) { + std::string ErrMsg = "could not open bitcode file for writing: "; + ErrMsg += Path.str() + ": " + EC.message(); + emitError(ErrMsg); + return false; + } + + // write bitcode to it + WriteBitcodeToFile(*MergedModule, Out.os(), ShouldEmbedUselists); + Out.os().close(); + + if (Out.os().has_error()) { + std::string ErrMsg = "could not write bitcode file: "; + ErrMsg += Path.str() + ": " + Out.os().error().message(); + emitError(ErrMsg); + Out.os().clear_error(); + return false; + } + + Out.keep(); + return true; +} + +bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) { + // make unique temp output file to put generated code + SmallString<128> Filename; + int FD; + + StringRef Extension + (FileType == TargetMachine::CGFT_AssemblyFile ? "s" : "o"); + + std::error_code EC = + sys::fs::createTemporaryFile("lto-llvm", Extension, FD, Filename); + if (EC) { + emitError(EC.message()); + return false; + } + + // generate object file + ToolOutputFile objFile(Filename, FD); + + bool genResult = compileOptimized(&objFile.os()); + objFile.os().close(); + if (objFile.os().has_error()) { + emitError((Twine("could not write object file: ") + Filename + ": " + + objFile.os().error().message()) + .str()); + objFile.os().clear_error(); + sys::fs::remove(Twine(Filename)); + return false; + } + + objFile.keep(); + if (!genResult) { + sys::fs::remove(Twine(Filename)); + return false; + } + + NativeObjectPath = Filename.c_str(); + *Name = NativeObjectPath.c_str(); + return true; +} + +std::unique_ptr<MemoryBuffer> +LTOCodeGenerator::compileOptimized() { + const char *name; + if (!compileOptimizedToFile(&name)) + return nullptr; + + // read .o file into memory buffer + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(name, -1, false); + if (std::error_code EC = BufferOrErr.getError()) { + emitError(EC.message()); + sys::fs::remove(NativeObjectPath); + return nullptr; + } + + // remove temp files + sys::fs::remove(NativeObjectPath); + + return std::move(*BufferOrErr); +} + +bool LTOCodeGenerator::compile_to_file(const char **Name, bool DisableVerify, + bool DisableInline, + bool DisableGVNLoadPRE, + bool DisableVectorization) { + if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, + DisableVectorization)) + return false; + + return compileOptimizedToFile(Name); +} + +std::unique_ptr<MemoryBuffer> +LTOCodeGenerator::compile(bool DisableVerify, bool DisableInline, + bool DisableGVNLoadPRE, bool DisableVectorization) { + if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, + DisableVectorization)) + return nullptr; + + return compileOptimized(); +} + +bool LTOCodeGenerator::determineTarget() { + if (TargetMach) + return true; + + TripleStr = MergedModule->getTargetTriple(); + if (TripleStr.empty()) { + TripleStr = sys::getDefaultTargetTriple(); + MergedModule->setTargetTriple(TripleStr); + } + llvm::Triple Triple(TripleStr); + + // create target machine from info for merged modules + std::string ErrMsg; + MArch = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!MArch) { + emitError(ErrMsg); + return false; + } + + // Construct LTOModule, hand over ownership of module and target. Use MAttr as + // the default set of features. + SubtargetFeatures Features(MAttr); + Features.getDefaultSubtargetFeatures(Triple); + FeatureStr = Features.getString(); + // Set a default CPU for Darwin triples. + if (MCpu.empty() && Triple.isOSDarwin()) { + if (Triple.getArch() == llvm::Triple::x86_64) + MCpu = "core2"; + else if (Triple.getArch() == llvm::Triple::x86) + MCpu = "yonah"; + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) + MCpu = "cyclone"; + } + + TargetMach = createTargetMachine(); + return true; +} + +std::unique_ptr<TargetMachine> LTOCodeGenerator::createTargetMachine() { + return std::unique_ptr<TargetMachine>(MArch->createTargetMachine( + TripleStr, MCpu, FeatureStr, Options, RelocModel, None, CGOptLevel)); +} + +// If a linkonce global is present in the MustPreserveSymbols, we need to make +// sure we honor this. To force the compiler to not drop it, we add it to the +// "llvm.compiler.used" global. +void LTOCodeGenerator::preserveDiscardableGVs( + Module &TheModule, + llvm::function_ref<bool(const GlobalValue &)> mustPreserveGV) { + std::vector<GlobalValue *> Used; + auto mayPreserveGlobal = [&](GlobalValue &GV) { + if (!GV.isDiscardableIfUnused() || GV.isDeclaration() || + !mustPreserveGV(GV)) + return; + if (GV.hasAvailableExternallyLinkage()) + return emitWarning( + (Twine("Linker asked to preserve available_externally global: '") + + GV.getName() + "'").str()); + if (GV.hasInternalLinkage()) + return emitWarning((Twine("Linker asked to preserve internal global: '") + + GV.getName() + "'").str()); + Used.push_back(&GV); + }; + for (auto &GV : TheModule) + mayPreserveGlobal(GV); + for (auto &GV : TheModule.globals()) + mayPreserveGlobal(GV); + for (auto &GV : TheModule.aliases()) + mayPreserveGlobal(GV); + + if (Used.empty()) + return; + + appendToCompilerUsed(TheModule, Used); +} + +void LTOCodeGenerator::applyScopeRestrictions() { + if (ScopeRestrictionsDone) + return; + + // Declare a callback for the internalize pass that will ask for every + // candidate GlobalValue if it can be internalized or not. + Mangler Mang; + SmallString<64> MangledName; + auto mustPreserveGV = [&](const GlobalValue &GV) -> bool { + // Unnamed globals can't be mangled, but they can't be preserved either. + if (!GV.hasName()) + return false; + + // Need to mangle the GV as the "MustPreserveSymbols" StringSet is filled + // with the linker supplied name, which on Darwin includes a leading + // underscore. + MangledName.clear(); + MangledName.reserve(GV.getName().size() + 1); + Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); + return MustPreserveSymbols.count(MangledName); + }; + + // Preserve linkonce value on linker request + preserveDiscardableGVs(*MergedModule, mustPreserveGV); + + if (!ShouldInternalize) + return; + + if (ShouldRestoreGlobalsLinkage) { + // Record the linkage type of non-local symbols so they can be restored + // prior + // to module splitting. + auto RecordLinkage = [&](const GlobalValue &GV) { + if (!GV.hasAvailableExternallyLinkage() && !GV.hasLocalLinkage() && + GV.hasName()) + ExternalSymbols.insert(std::make_pair(GV.getName(), GV.getLinkage())); + }; + for (auto &GV : *MergedModule) + RecordLinkage(GV); + for (auto &GV : MergedModule->globals()) + RecordLinkage(GV); + for (auto &GV : MergedModule->aliases()) + RecordLinkage(GV); + } + + // Update the llvm.compiler_used globals to force preserving libcalls and + // symbols referenced from asm + updateCompilerUsed(*MergedModule, *TargetMach, AsmUndefinedRefs); + + internalizeModule(*MergedModule, mustPreserveGV); + + MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + + ScopeRestrictionsDone = true; +} + +/// Restore original linkage for symbols that may have been internalized +void LTOCodeGenerator::restoreLinkageForExternals() { + if (!ShouldInternalize || !ShouldRestoreGlobalsLinkage) + return; + + assert(ScopeRestrictionsDone && + "Cannot externalize without internalization!"); + + if (ExternalSymbols.empty()) + return; + + auto externalize = [this](GlobalValue &GV) { + if (!GV.hasLocalLinkage() || !GV.hasName()) + return; + + auto I = ExternalSymbols.find(GV.getName()); + if (I == ExternalSymbols.end()) + return; + + GV.setLinkage(I->second); + }; + + llvm::for_each(MergedModule->functions(), externalize); + llvm::for_each(MergedModule->globals(), externalize); + llvm::for_each(MergedModule->aliases(), externalize); +} + +void LTOCodeGenerator::verifyMergedModuleOnce() { + // Only run on the first call. + if (HasVerifiedInput) + return; + HasVerifiedInput = true; + + bool BrokenDebugInfo = false; + if (verifyModule(*MergedModule, &dbgs(), &BrokenDebugInfo)) + report_fatal_error("Broken module found, compilation aborted!"); + if (BrokenDebugInfo) { + emitWarning("Invalid debug info found, debug info will be stripped"); + StripDebugInfo(*MergedModule); + } +} + +void LTOCodeGenerator::finishOptimizationRemarks() { + if (DiagnosticOutputFile) { + DiagnosticOutputFile->keep(); + // FIXME: LTOCodeGenerator dtor is not invoked on Darwin + DiagnosticOutputFile->os().flush(); + } +} + +/// Optimize merged modules using various IPO passes +bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, + bool DisableGVNLoadPRE, + bool DisableVectorization) { + if (!this->determineTarget()) + return false; + + auto DiagFileOrErr = + lto::setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses, + RemarksFormat, RemarksWithHotness); + if (!DiagFileOrErr) { + errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n"; + report_fatal_error("Can't get an output file for the remarks"); + } + DiagnosticOutputFile = std::move(*DiagFileOrErr); + + // Setup output file to emit statistics. + auto StatsFileOrErr = lto::setupStatsFile(LTOStatsFile); + if (!StatsFileOrErr) { + errs() << "Error: " << toString(StatsFileOrErr.takeError()) << "\n"; + report_fatal_error("Can't get an output file for the statistics"); + } + StatsFile = std::move(StatsFileOrErr.get()); + + // We always run the verifier once on the merged module, the `DisableVerify` + // parameter only applies to subsequent verify. + verifyMergedModuleOnce(); + + // Mark which symbols can not be internalized + this->applyScopeRestrictions(); + + // Instantiate the pass manager to organize the passes. + legacy::PassManager passes; + + // Add an appropriate DataLayout instance for this module... + MergedModule->setDataLayout(TargetMach->createDataLayout()); + + passes.add( + createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis())); + + Triple TargetTriple(TargetMach->getTargetTriple()); + PassManagerBuilder PMB; + PMB.DisableGVNLoadPRE = DisableGVNLoadPRE; + PMB.LoopVectorize = !DisableVectorization; + PMB.SLPVectorize = !DisableVectorization; + if (!DisableInline) + PMB.Inliner = createFunctionInliningPass(); + PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple); + if (Freestanding) + PMB.LibraryInfo->disableAllFunctions(); + PMB.OptLevel = OptLevel; + PMB.VerifyInput = !DisableVerify; + PMB.VerifyOutput = !DisableVerify; + + PMB.populateLTOPassManager(passes); + + // Run our queue of passes all at once now, efficiently. + passes.run(*MergedModule); + + return true; +} + +bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) { + if (!this->determineTarget()) + return false; + + // We always run the verifier once on the merged module. If it has already + // been called in optimize(), this call will return early. + verifyMergedModuleOnce(); + + legacy::PassManager preCodeGenPasses; + + // If the bitcode files contain ARC code and were compiled with optimization, + // the ObjCARCContractPass must be run, so do it unconditionally here. + preCodeGenPasses.add(createObjCARCContractPass()); + preCodeGenPasses.run(*MergedModule); + + // Re-externalize globals that may have been internalized to increase scope + // for splitting + restoreLinkageForExternals(); + + // Do code generation. We need to preserve the module in case the client calls + // writeMergedModules() after compilation, but we only need to allow this at + // parallelism level 1. This is achieved by having splitCodeGen return the + // original module at parallelism level 1 which we then assign back to + // MergedModule. + MergedModule = splitCodeGen(std::move(MergedModule), Out, {}, + [&]() { return createTargetMachine(); }, FileType, + ShouldRestoreGlobalsLinkage); + + // If statistics were requested, save them to the specified file or + // print them out after codegen. + if (StatsFile) + PrintStatisticsJSON(StatsFile->os()); + else if (AreStatisticsEnabled()) + PrintStatistics(); + + reportAndResetTimings(); + + finishOptimizationRemarks(); + + return true; +} + +/// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging +/// LTO problems. +void LTOCodeGenerator::setCodeGenDebugOptions(StringRef Options) { + for (std::pair<StringRef, StringRef> o = getToken(Options); !o.first.empty(); + o = getToken(o.second)) + CodegenOptions.push_back(o.first); +} + +void LTOCodeGenerator::parseCodeGenDebugOptions() { + // if options were requested, set them + if (!CodegenOptions.empty()) { + // ParseCommandLineOptions() expects argv[0] to be program name. + std::vector<const char *> CodegenArgv(1, "libLLVMLTO"); + for (std::string &Arg : CodegenOptions) + CodegenArgv.push_back(Arg.c_str()); + cl::ParseCommandLineOptions(CodegenArgv.size(), CodegenArgv.data()); + } +} + + +void LTOCodeGenerator::DiagnosticHandler(const DiagnosticInfo &DI) { + // Map the LLVM internal diagnostic severity to the LTO diagnostic severity. + lto_codegen_diagnostic_severity_t Severity; + switch (DI.getSeverity()) { + case DS_Error: + Severity = LTO_DS_ERROR; + break; + case DS_Warning: + Severity = LTO_DS_WARNING; + break; + case DS_Remark: + Severity = LTO_DS_REMARK; + break; + case DS_Note: + Severity = LTO_DS_NOTE; + break; + } + // Create the string that will be reported to the external diagnostic handler. + std::string MsgStorage; + raw_string_ostream Stream(MsgStorage); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); + Stream.flush(); + + // If this method has been called it means someone has set up an external + // diagnostic handler. Assert on that. + assert(DiagHandler && "Invalid diagnostic handler"); + (*DiagHandler)(Severity, MsgStorage.c_str(), DiagContext); +} + +namespace { +struct LTODiagnosticHandler : public DiagnosticHandler { + LTOCodeGenerator *CodeGenerator; + LTODiagnosticHandler(LTOCodeGenerator *CodeGenPtr) + : CodeGenerator(CodeGenPtr) {} + bool handleDiagnostics(const DiagnosticInfo &DI) override { + CodeGenerator->DiagnosticHandler(DI); + return true; + } +}; +} + +void +LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler, + void *Ctxt) { + this->DiagHandler = DiagHandler; + this->DiagContext = Ctxt; + if (!DiagHandler) + return Context.setDiagnosticHandler(nullptr); + // Register the LTOCodeGenerator stub in the LLVMContext to forward the + // diagnostic to the external DiagHandler. + Context.setDiagnosticHandler(std::make_unique<LTODiagnosticHandler>(this), + true); +} + +namespace { +class LTODiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; +public: + LTODiagnosticInfo(const Twine &DiagMsg, DiagnosticSeverity Severity=DS_Error) + : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {} + void print(DiagnosticPrinter &DP) const override { DP << Msg; } +}; +} + +void LTOCodeGenerator::emitError(const std::string &ErrMsg) { + if (DiagHandler) + (*DiagHandler)(LTO_DS_ERROR, ErrMsg.c_str(), DiagContext); + else + Context.diagnose(LTODiagnosticInfo(ErrMsg)); +} + +void LTOCodeGenerator::emitWarning(const std::string &ErrMsg) { + if (DiagHandler) + (*DiagHandler)(LTO_DS_WARNING, ErrMsg.c_str(), DiagContext); + else + Context.diagnose(LTODiagnosticInfo(ErrMsg, DS_Warning)); +} diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp new file mode 100644 index 0000000000000..587b332e70649 --- /dev/null +++ b/llvm/lib/LTO/LTOModule.cpp @@ -0,0 +1,678 @@ +//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Link Time Optimization library. This library is +// intended to be used by linker to optimize code at link time. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/legacy/LTOModule.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" +#include <system_error> +using namespace llvm; +using namespace llvm::object; + +LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef, + llvm::TargetMachine *TM) + : Mod(std::move(M)), MBRef(MBRef), _target(TM) { + SymTab.addModule(Mod.get()); +} + +LTOModule::~LTOModule() {} + +/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM +/// bitcode. +bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) { + Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer( + MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>")); + return !errorToBool(BCData.takeError()); +} + +bool LTOModule::isBitcodeFile(StringRef Path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(Path); + if (!BufferOrErr) + return false; + + Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer( + BufferOrErr.get()->getMemBufferRef()); + return !errorToBool(BCData.takeError()); +} + +bool LTOModule::isThinLTO() { + Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef); + if (!Result) { + logAllUnhandledErrors(Result.takeError(), errs()); + return false; + } + return Result->IsThinLTO; +} + +bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, + StringRef TriplePrefix) { + Expected<MemoryBufferRef> BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); + if (errorToBool(BCOrErr.takeError())) + return false; + LLVMContext Context; + ErrorOr<std::string> TripleOrErr = + expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(*BCOrErr)); + if (!TripleOrErr) + return false; + return StringRef(*TripleOrErr).startswith(TriplePrefix); +} + +std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { + Expected<MemoryBufferRef> BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); + if (errorToBool(BCOrErr.takeError())) + return ""; + LLVMContext Context; + ErrorOr<std::string> ProducerOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeProducerString(*BCOrErr)); + if (!ProducerOrErr) + return ""; + return *ProducerOrErr; +} + +ErrorOr<std::unique_ptr<LTOModule>> +LTOModule::createFromFile(LLVMContext &Context, StringRef path, + const TargetOptions &options) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(path); + if (std::error_code EC = BufferOrErr.getError()) { + Context.emitError(EC.message()); + return EC; + } + std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get()); + return makeLTOModule(Buffer->getMemBufferRef(), options, Context, + /* ShouldBeLazy*/ false); +} + +ErrorOr<std::unique_ptr<LTOModule>> +LTOModule::createFromOpenFile(LLVMContext &Context, int fd, StringRef path, + size_t size, const TargetOptions &options) { + return createFromOpenFileSlice(Context, fd, path, size, 0, options); +} + +ErrorOr<std::unique_ptr<LTOModule>> +LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path, + size_t map_size, off_t offset, + const TargetOptions &options) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getOpenFileSlice(sys::fs::convertFDToNativeFile(fd), path, + map_size, offset); + if (std::error_code EC = BufferOrErr.getError()) { + Context.emitError(EC.message()); + return EC; + } + std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get()); + return makeLTOModule(Buffer->getMemBufferRef(), options, Context, + /* ShouldBeLazy */ false); +} + +ErrorOr<std::unique_ptr<LTOModule>> +LTOModule::createFromBuffer(LLVMContext &Context, const void *mem, + size_t length, const TargetOptions &options, + StringRef path) { + StringRef Data((const char *)mem, length); + MemoryBufferRef Buffer(Data, path); + return makeLTOModule(Buffer, options, Context, /* ShouldBeLazy */ false); +} + +ErrorOr<std::unique_ptr<LTOModule>> +LTOModule::createInLocalContext(std::unique_ptr<LLVMContext> Context, + const void *mem, size_t length, + const TargetOptions &options, StringRef path) { + StringRef Data((const char *)mem, length); + MemoryBufferRef Buffer(Data, path); + // If we own a context, we know this is being used only for symbol extraction, + // not linking. Be lazy in that case. + ErrorOr<std::unique_ptr<LTOModule>> Ret = + makeLTOModule(Buffer, options, *Context, /* ShouldBeLazy */ true); + if (Ret) + (*Ret)->OwnedContext = std::move(Context); + return Ret; +} + +static ErrorOr<std::unique_ptr<Module>> +parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, + bool ShouldBeLazy) { + // Find the buffer. + Expected<MemoryBufferRef> MBOrErr = + IRObjectFile::findBitcodeInMemBuffer(Buffer); + if (Error E = MBOrErr.takeError()) { + std::error_code EC = errorToErrorCode(std::move(E)); + Context.emitError(EC.message()); + return EC; + } + + if (!ShouldBeLazy) { + // Parse the full file. + return expectedToErrorOrAndEmitErrors(Context, + parseBitcodeFile(*MBOrErr, Context)); + } + + // Parse lazily. + return expectedToErrorOrAndEmitErrors( + Context, + getLazyBitcodeModule(*MBOrErr, Context, true /*ShouldLazyLoadMetadata*/)); +} + +ErrorOr<std::unique_ptr<LTOModule>> +LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, + LLVMContext &Context, bool ShouldBeLazy) { + ErrorOr<std::unique_ptr<Module>> MOrErr = + parseBitcodeFileImpl(Buffer, Context, ShouldBeLazy); + if (std::error_code EC = MOrErr.getError()) + return EC; + std::unique_ptr<Module> &M = *MOrErr; + + std::string TripleStr = M->getTargetTriple(); + if (TripleStr.empty()) + TripleStr = sys::getDefaultTargetTriple(); + llvm::Triple Triple(TripleStr); + + // find machine architecture for this module + std::string errMsg; + const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); + if (!march) + return make_error_code(object::object_error::arch_not_found); + + // construct LTOModule, hand over ownership of module and target + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures(Triple); + std::string FeatureStr = Features.getString(); + // Set a default CPU for Darwin triples. + std::string CPU; + if (Triple.isOSDarwin()) { + if (Triple.getArch() == llvm::Triple::x86_64) + CPU = "core2"; + else if (Triple.getArch() == llvm::Triple::x86) + CPU = "yonah"; + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) + CPU = "cyclone"; + } + + TargetMachine *target = + march->createTargetMachine(TripleStr, CPU, FeatureStr, options, None); + + std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(M), Buffer, target)); + Ret->parseSymbols(); + Ret->parseMetadata(); + + return std::move(Ret); +} + +/// Create a MemoryBuffer from a memory range with an optional name. +std::unique_ptr<MemoryBuffer> +LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) { + const char *startPtr = (const char*)mem; + return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false); +} + +/// objcClassNameFromExpression - Get string that the data pointer points to. +bool +LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) { + if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) { + Constant *op = ce->getOperand(0); + if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) { + Constant *cn = gvn->getInitializer(); + if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) { + if (ca->isCString()) { + name = (".objc_class_name_" + ca->getAsCString()).str(); + return true; + } + } + } + } + return false; +} + +/// addObjCClass - Parse i386/ppc ObjC class data structure. +void LTOModule::addObjCClass(const GlobalVariable *clgv) { + const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer()); + if (!c) return; + + // second slot in __OBJC,__class is pointer to superclass name + std::string superclassName; + if (objcClassNameFromExpression(c->getOperand(1), superclassName)) { + auto IterBool = + _undefines.insert(std::make_pair(superclassName, NameAndAttributes())); + if (IterBool.second) { + NameAndAttributes &info = IterBool.first->second; + info.name = IterBool.first->first(); + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + info.isFunction = false; + info.symbol = clgv; + } + } + + // third slot in __OBJC,__class is pointer to class name + std::string className; + if (objcClassNameFromExpression(c->getOperand(2), className)) { + auto Iter = _defines.insert(className).first; + + NameAndAttributes info; + info.name = Iter->first(); + info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | + LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT; + info.isFunction = false; + info.symbol = clgv; + _symbols.push_back(info); + } +} + +/// addObjCCategory - Parse i386/ppc ObjC category data structure. +void LTOModule::addObjCCategory(const GlobalVariable *clgv) { + const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer()); + if (!c) return; + + // second slot in __OBJC,__category is pointer to target class name + std::string targetclassName; + if (!objcClassNameFromExpression(c->getOperand(1), targetclassName)) + return; + + auto IterBool = + _undefines.insert(std::make_pair(targetclassName, NameAndAttributes())); + + if (!IterBool.second) + return; + + NameAndAttributes &info = IterBool.first->second; + info.name = IterBool.first->first(); + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + info.isFunction = false; + info.symbol = clgv; +} + +/// addObjCClassRef - Parse i386/ppc ObjC class list data structure. +void LTOModule::addObjCClassRef(const GlobalVariable *clgv) { + std::string targetclassName; + if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) + return; + + auto IterBool = + _undefines.insert(std::make_pair(targetclassName, NameAndAttributes())); + + if (!IterBool.second) + return; + + NameAndAttributes &info = IterBool.first->second; + info.name = IterBool.first->first(); + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + info.isFunction = false; + info.symbol = clgv; +} + +void LTOModule::addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym) { + SmallString<64> Buffer; + { + raw_svector_ostream OS(Buffer); + SymTab.printSymbolName(OS, Sym); + Buffer.c_str(); + } + + const GlobalValue *V = Sym.get<GlobalValue *>(); + addDefinedDataSymbol(Buffer, V); +} + +void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) { + // Add to list of defined symbols. + addDefinedSymbol(Name, v, false); + + if (!v->hasSection() /* || !isTargetDarwin */) + return; + + // Special case i386/ppc ObjC data structures in magic sections: + // The issue is that the old ObjC object format did some strange + // contortions to avoid real linker symbols. For instance, the + // ObjC class data structure is allocated statically in the executable + // that defines that class. That data structures contains a pointer to + // its superclass. But instead of just initializing that part of the + // struct to the address of its superclass, and letting the static and + // dynamic linkers do the rest, the runtime works by having that field + // instead point to a C-string that is the name of the superclass. + // At runtime the objc initialization updates that pointer and sets + // it to point to the actual super class. As far as the linker + // knows it is just a pointer to a string. But then someone wanted the + // linker to issue errors at build time if the superclass was not found. + // So they figured out a way in mach-o object format to use an absolute + // symbols (.objc_class_name_Foo = 0) and a floating reference + // (.reference .objc_class_name_Bar) to cause the linker into erroring when + // a class was missing. + // The following synthesizes the implicit .objc_* symbols for the linker + // from the ObjC data structures generated by the front end. + + // special case if this data blob is an ObjC class definition + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) { + StringRef Section = GV->getSection(); + if (Section.startswith("__OBJC,__class,")) { + addObjCClass(GV); + } + + // special case if this data blob is an ObjC category definition + else if (Section.startswith("__OBJC,__category,")) { + addObjCCategory(GV); + } + + // special case if this data blob is the list of referenced classes + else if (Section.startswith("__OBJC,__cls_refs,")) { + addObjCClassRef(GV); + } + } +} + +void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) { + SmallString<64> Buffer; + { + raw_svector_ostream OS(Buffer); + SymTab.printSymbolName(OS, Sym); + Buffer.c_str(); + } + + const Function *F = cast<Function>(Sym.get<GlobalValue *>()); + addDefinedFunctionSymbol(Buffer, F); +} + +void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) { + // add to list of defined symbols + addDefinedSymbol(Name, F, true); +} + +void LTOModule::addDefinedSymbol(StringRef Name, const GlobalValue *def, + bool isFunction) { + // set alignment part log2() can have rounding errors + uint32_t align = def->getAlignment(); + uint32_t attr = align ? countTrailingZeros(align) : 0; + + // set permissions part + if (isFunction) { + attr |= LTO_SYMBOL_PERMISSIONS_CODE; + } else { + const GlobalVariable *gv = dyn_cast<GlobalVariable>(def); + if (gv && gv->isConstant()) + attr |= LTO_SYMBOL_PERMISSIONS_RODATA; + else + attr |= LTO_SYMBOL_PERMISSIONS_DATA; + } + + // set definition part + if (def->hasWeakLinkage() || def->hasLinkOnceLinkage()) + attr |= LTO_SYMBOL_DEFINITION_WEAK; + else if (def->hasCommonLinkage()) + attr |= LTO_SYMBOL_DEFINITION_TENTATIVE; + else + attr |= LTO_SYMBOL_DEFINITION_REGULAR; + + // set scope part + if (def->hasLocalLinkage()) + // Ignore visibility if linkage is local. + attr |= LTO_SYMBOL_SCOPE_INTERNAL; + else if (def->hasHiddenVisibility()) + attr |= LTO_SYMBOL_SCOPE_HIDDEN; + else if (def->hasProtectedVisibility()) + attr |= LTO_SYMBOL_SCOPE_PROTECTED; + else if (def->canBeOmittedFromSymbolTable()) + attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN; + else + attr |= LTO_SYMBOL_SCOPE_DEFAULT; + + if (def->hasComdat()) + attr |= LTO_SYMBOL_COMDAT; + + if (isa<GlobalAlias>(def)) + attr |= LTO_SYMBOL_ALIAS; + + auto Iter = _defines.insert(Name).first; + + // fill information structure + NameAndAttributes info; + StringRef NameRef = Iter->first(); + info.name = NameRef; + assert(NameRef.data()[NameRef.size()] == '\0'); + info.attributes = attr; + info.isFunction = isFunction; + info.symbol = def; + + // add to table of symbols + _symbols.push_back(info); +} + +/// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the +/// defined list. +void LTOModule::addAsmGlobalSymbol(StringRef name, + lto_symbol_attributes scope) { + auto IterBool = _defines.insert(name); + + // only add new define if not already defined + if (!IterBool.second) + return; + + NameAndAttributes &info = _undefines[IterBool.first->first()]; + + if (info.symbol == nullptr) { + // FIXME: This is trying to take care of module ASM like this: + // + // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0" + // + // but is gross and its mother dresses it funny. Have the ASM parser give us + // more details for this type of situation so that we're not guessing so + // much. + + // fill information structure + info.name = IterBool.first->first(); + info.attributes = + LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope; + info.isFunction = false; + info.symbol = nullptr; + + // add to table of symbols + _symbols.push_back(info); + return; + } + + if (info.isFunction) + addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol)); + else + addDefinedDataSymbol(info.name, info.symbol); + + _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK; + _symbols.back().attributes |= scope; +} + +/// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the +/// undefined list. +void LTOModule::addAsmGlobalSymbolUndef(StringRef name) { + auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes())); + + _asm_undefines.push_back(IterBool.first->first()); + + // we already have the symbol + if (!IterBool.second) + return; + + uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED; + attr |= LTO_SYMBOL_SCOPE_DEFAULT; + NameAndAttributes &info = IterBool.first->second; + info.name = IterBool.first->first(); + info.attributes = attr; + info.isFunction = false; + info.symbol = nullptr; +} + +/// Add a symbol which isn't defined just yet to a list to be resolved later. +void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym, + bool isFunc) { + SmallString<64> name; + { + raw_svector_ostream OS(name); + SymTab.printSymbolName(OS, Sym); + name.c_str(); + } + + auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes())); + + // we already have the symbol + if (!IterBool.second) + return; + + NameAndAttributes &info = IterBool.first->second; + + info.name = IterBool.first->first(); + + const GlobalValue *decl = Sym.dyn_cast<GlobalValue *>(); + + if (decl->hasExternalWeakLinkage()) + info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; + else + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + + info.isFunction = isFunc; + info.symbol = decl; +} + +void LTOModule::parseSymbols() { + for (auto Sym : SymTab.symbols()) { + auto *GV = Sym.dyn_cast<GlobalValue *>(); + uint32_t Flags = SymTab.getSymbolFlags(Sym); + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + continue; + + bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined; + + if (!GV) { + SmallString<64> Buffer; + { + raw_svector_ostream OS(Buffer); + SymTab.printSymbolName(OS, Sym); + Buffer.c_str(); + } + StringRef Name(Buffer); + + if (IsUndefined) + addAsmGlobalSymbolUndef(Name); + else if (Flags & object::BasicSymbolRef::SF_Global) + addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT); + else + addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL); + continue; + } + + auto *F = dyn_cast<Function>(GV); + if (IsUndefined) { + addPotentialUndefinedSymbol(Sym, F != nullptr); + continue; + } + + if (F) { + addDefinedFunctionSymbol(Sym); + continue; + } + + if (isa<GlobalVariable>(GV)) { + addDefinedDataSymbol(Sym); + continue; + } + + assert(isa<GlobalAlias>(GV)); + addDefinedDataSymbol(Sym); + } + + // make symbols for all undefines + for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(), + e = _undefines.end(); u != e; ++u) { + // If this symbol also has a definition, then don't make an undefine because + // it is a tentative definition. + if (_defines.count(u->getKey())) continue; + NameAndAttributes info = u->getValue(); + _symbols.push_back(info); + } +} + +/// parseMetadata - Parse metadata from the module +void LTOModule::parseMetadata() { + raw_string_ostream OS(LinkerOpts); + + // Linker Options + if (NamedMDNode *LinkerOptions = + getModule().getNamedMetadata("llvm.linker.options")) { + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = LinkerOptions->getOperand(i); + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); + OS << " " << MDOption->getString(); + } + } + } + + // Globals - we only need to do this for COFF. + const Triple TT(_target->getTargetTriple()); + if (!TT.isOSBinFormatCOFF()) + return; + Mangler M; + for (const NameAndAttributes &Sym : _symbols) { + if (!Sym.symbol) + continue; + emitLinkerFlagsForGlobalCOFF(OS, Sym.symbol, TT, M); + } +} + +lto::InputFile *LTOModule::createInputFile(const void *buffer, + size_t buffer_size, const char *path, + std::string &outErr) { + StringRef Data((const char *)buffer, buffer_size); + MemoryBufferRef BufferRef(Data, path); + + Expected<std::unique_ptr<lto::InputFile>> ObjOrErr = + lto::InputFile::create(BufferRef); + + if (ObjOrErr) + return ObjOrErr->release(); + + outErr = std::string(path) + + ": Could not read LTO input file: " + toString(ObjOrErr.takeError()); + return nullptr; +} + +size_t LTOModule::getDependentLibraryCount(lto::InputFile *input) { + return input->getDependentLibraries().size(); +} + +const char *LTOModule::getDependentLibrary(lto::InputFile *input, size_t index, + size_t *size) { + StringRef S = input->getDependentLibraries()[index]; + *size = S.size(); + return S.data(); +} diff --git a/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/llvm/lib/LTO/SummaryBasedOptimizations.cpp new file mode 100644 index 0000000000000..6db495de003b6 --- /dev/null +++ b/llvm/lib/LTO/SummaryBasedOptimizations.cpp @@ -0,0 +1,85 @@ +//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements optimizations that are based on the module summaries. +// These optimizations are performed during the thinlink phase of the +// compilation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/SummaryBasedOptimizations.h" +#include "llvm/Analysis/SyntheticCountsUtils.h" +#include "llvm/IR/ModuleSummaryIndex.h" + +using namespace llvm; + +static cl::opt<bool> ThinLTOSynthesizeEntryCounts( + "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden, + cl::desc("Synthesize entry counts based on the summary")); + +extern cl::opt<int> InitialSyntheticCount; + +static void initializeCounts(ModuleSummaryIndex &Index) { + auto Root = Index.calculateCallGraphRoot(); + // Root is a fake node. All its successors are the actual roots of the + // callgraph. + // FIXME: This initializes the entry counts of only the root nodes. This makes + // sense when compiling a binary with ThinLTO, but for libraries any of the + // non-root nodes could be called from outside. + for (auto &C : Root.calls()) { + auto &V = C.first; + for (auto &GVS : V.getSummaryList()) { + auto S = GVS.get()->getBaseObject(); + auto *F = cast<FunctionSummary>(S); + F->setEntryCount(InitialSyntheticCount); + } + } +} + +void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) { + if (!ThinLTOSynthesizeEntryCounts) + return; + + using Scaled64 = ScaledNumber<uint64_t>; + initializeCounts(Index); + auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) { + return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift); + }; + auto GetEntryCount = [](ValueInfo V) { + if (V.getSummaryList().size()) { + auto S = V.getSummaryList().front().get()->getBaseObject(); + auto *F = cast<FunctionSummary>(S); + return F->entryCount(); + } else { + return UINT64_C(0); + } + }; + auto AddToEntryCount = [](ValueInfo V, Scaled64 New) { + if (!V.getSummaryList().size()) + return; + for (auto &GVS : V.getSummaryList()) { + auto S = GVS.get()->getBaseObject(); + auto *F = cast<FunctionSummary>(S); + F->setEntryCount( + SaturatingAdd(F->entryCount(), New.template toInt<uint64_t>())); + } + }; + + auto GetProfileCount = [&](ValueInfo V, FunctionSummary::EdgeTy &Edge) { + auto RelFreq = GetCallSiteRelFreq(Edge); + Scaled64 EC(GetEntryCount(V), 0); + return RelFreq * EC; + }; + // After initializing the counts in initializeCounts above, the counts have to + // be propagated across the combined callgraph. + // SyntheticCountsUtils::propagate takes care of this propagation on any + // callgraph that specialized GraphTraits. + SyntheticCountsUtils<ModuleSummaryIndex *>::propagate(&Index, GetProfileCount, + AddToEntryCount); + Index.setHasSyntheticEntryCounts(); +} diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp new file mode 100644 index 0000000000000..d151de17896f6 --- /dev/null +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -0,0 +1,1135 @@ +//===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Thin Link Time Optimization library. This library is +// intended to be used by linker to optimize code at link time. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/legacy/ThinLTOCodeGenerator.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/PassTimingInfo.h" +#include "llvm/IR/RemarkStreamer.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/LTO/LTO.h" +#include "llvm/LTO/SummaryBasedOptimizations.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Support/CachePruning.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VCSRevision.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" +#include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" + +#include <numeric> + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; + +#define DEBUG_TYPE "thinlto" + +namespace llvm { +// Flags -discard-value-names, defined in LTOCodeGenerator.cpp +extern cl::opt<bool> LTODiscardValueNames; +extern cl::opt<std::string> RemarksFilename; +extern cl::opt<std::string> RemarksPasses; +extern cl::opt<bool> RemarksWithHotness; +extern cl::opt<std::string> RemarksFormat; +} + +namespace { + +static cl::opt<int> + ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency())); + +// Simple helper to save temporary files for debug. +static void saveTempBitcode(const Module &TheModule, StringRef TempDir, + unsigned count, StringRef Suffix) { + if (TempDir.empty()) + return; + // User asked to save temps, let dump the bitcode file after import. + std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); + std::error_code EC; + raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); + if (EC) + report_fatal_error(Twine("Failed to open ") + SaveTempPath + + " to save optimized bitcode\n"); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +static const GlobalValueSummary * +getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) { + // If there is any strong definition anywhere, get it. + auto StrongDefForLinker = llvm::find_if( + GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) { + auto Linkage = Summary->linkage(); + return !GlobalValue::isAvailableExternallyLinkage(Linkage) && + !GlobalValue::isWeakForLinker(Linkage); + }); + if (StrongDefForLinker != GVSummaryList.end()) + return StrongDefForLinker->get(); + // Get the first *linker visible* definition for this global in the summary + // list. + auto FirstDefForLinker = llvm::find_if( + GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) { + auto Linkage = Summary->linkage(); + return !GlobalValue::isAvailableExternallyLinkage(Linkage); + }); + // Extern templates can be emitted as available_externally. + if (FirstDefForLinker == GVSummaryList.end()) + return nullptr; + return FirstDefForLinker->get(); +} + +// Populate map of GUID to the prevailing copy for any multiply defined +// symbols. Currently assume first copy is prevailing, or any strong +// definition. Can be refined with Linker information in the future. +static void computePrevailingCopies( + const ModuleSummaryIndex &Index, + DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) { + auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) { + return GVSummaryList.size() > 1; + }; + + for (auto &I : Index) { + if (HasMultipleCopies(I.second.SummaryList)) + PrevailingCopy[I.first] = + getFirstDefinitionForLinker(I.second.SummaryList); + } +} + +static StringMap<lto::InputFile *> +generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) { + StringMap<lto::InputFile *> ModuleMap; + for (auto &M : Modules) { + assert(ModuleMap.find(M->getName()) == ModuleMap.end() && + "Expect unique Buffer Identifier"); + ModuleMap[M->getName()] = M.get(); + } + return ModuleMap; +} + +static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) { + if (renameModuleForThinLTO(TheModule, Index)) + report_fatal_error("renameModuleForThinLTO failed"); +} + +namespace { +class ThinLTODiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; +public: + ThinLTODiagnosticInfo(const Twine &DiagMsg, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {} + void print(DiagnosticPrinter &DP) const override { DP << Msg; } +}; +} + +/// Verify the module and strip broken debug info. +static void verifyLoadedModule(Module &TheModule) { + bool BrokenDebugInfo = false; + if (verifyModule(TheModule, &dbgs(), &BrokenDebugInfo)) + report_fatal_error("Broken module found, compilation aborted!"); + if (BrokenDebugInfo) { + TheModule.getContext().diagnose(ThinLTODiagnosticInfo( + "Invalid debug info found, debug info will be stripped", DS_Warning)); + StripDebugInfo(TheModule); + } +} + +static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input, + LLVMContext &Context, + bool Lazy, + bool IsImporting) { + auto &Mod = Input->getSingleBitcodeModule(); + SMDiagnostic Err; + Expected<std::unique_ptr<Module>> ModuleOrErr = + Lazy ? Mod.getLazyModule(Context, + /* ShouldLazyLoadMetadata */ true, IsImporting) + : Mod.parseModule(Context); + if (!ModuleOrErr) { + handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { + SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(), + SourceMgr::DK_Error, EIB.message()); + Err.print("ThinLTO", errs()); + }); + report_fatal_error("Can't load module, abort."); + } + if (!Lazy) + verifyLoadedModule(*ModuleOrErr.get()); + return std::move(*ModuleOrErr); +} + +static void +crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, + StringMap<lto::InputFile*> &ModuleMap, + const FunctionImporter::ImportMapTy &ImportList) { + auto Loader = [&](StringRef Identifier) { + auto &Input = ModuleMap[Identifier]; + return loadModuleFromInput(Input, TheModule.getContext(), + /*Lazy=*/true, /*IsImporting*/ true); + }; + + FunctionImporter Importer(Index, Loader); + Expected<bool> Result = Importer.importFunctions(TheModule, ImportList); + if (!Result) { + handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) { + SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(), + SourceMgr::DK_Error, EIB.message()); + Err.print("ThinLTO", errs()); + }); + report_fatal_error("importFunctions failed"); + } + // Verify again after cross-importing. + verifyLoadedModule(TheModule); +} + +static void optimizeModule(Module &TheModule, TargetMachine &TM, + unsigned OptLevel, bool Freestanding, + ModuleSummaryIndex *Index) { + // Populate the PassManager + PassManagerBuilder PMB; + PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); + if (Freestanding) + PMB.LibraryInfo->disableAllFunctions(); + PMB.Inliner = createFunctionInliningPass(); + // FIXME: should get it from the bitcode? + PMB.OptLevel = OptLevel; + PMB.LoopVectorize = true; + PMB.SLPVectorize = true; + // Already did this in verifyLoadedModule(). + PMB.VerifyInput = false; + PMB.VerifyOutput = false; + PMB.ImportSummary = Index; + + legacy::PassManager PM; + + // Add the TTI (required to inform the vectorizer about register size for + // instance) + PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); + + // Add optimizations + PMB.populateThinLTOPassManager(PM); + + PM.run(TheModule); +} + +static void +addUsedSymbolToPreservedGUID(const lto::InputFile &File, + DenseSet<GlobalValue::GUID> &PreservedGUID) { + for (const auto &Sym : File.symbols()) { + if (Sym.isUsed()) + PreservedGUID.insert(GlobalValue::getGUID(Sym.getIRName())); + } +} + +// Convert the PreservedSymbols map from "Name" based to "GUID" based. +static DenseSet<GlobalValue::GUID> +computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols, + const Triple &TheTriple) { + DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size()); + for (auto &Entry : PreservedSymbols) { + StringRef Name = Entry.first(); + if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_') + Name = Name.drop_front(); + GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name)); + } + return GUIDPreservedSymbols; +} + +std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule, + TargetMachine &TM) { + SmallVector<char, 128> OutputBuffer; + + // CodeGen + { + raw_svector_ostream OS(OutputBuffer); + legacy::PassManager PM; + + // If the bitcode files contain ARC code and were compiled with optimization, + // the ObjCARCContractPass must be run, so do it unconditionally here. + PM.add(createObjCARCContractPass()); + + // Setup the codegen now. + if (TM.addPassesToEmitFile(PM, OS, nullptr, TargetMachine::CGFT_ObjectFile, + /* DisableVerify */ true)) + report_fatal_error("Failed to setup codegen"); + + // Run codegen now. resulting binary is in OutputBuffer. + PM.run(TheModule); + } + return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer)); +} + +/// Manage caching for a single Module. +class ModuleCacheEntry { + SmallString<128> EntryPath; + +public: + // Create a cache entry. This compute a unique hash for the Module considering + // the current list of export/import, and offer an interface to query to + // access the content in the cache. + ModuleCacheEntry( + StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, + const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel, + bool Freestanding, const TargetMachineBuilder &TMBuilder) { + if (CachePath.empty()) + return; + + if (!Index.modulePaths().count(ModuleID)) + // The module does not have an entry, it can't have a hash at all + return; + + if (all_of(Index.getModuleHash(ModuleID), + [](uint32_t V) { return V == 0; })) + // No hash entry, no caching! + return; + + llvm::lto::Config Conf; + Conf.OptLevel = OptLevel; + Conf.Options = TMBuilder.Options; + Conf.CPU = TMBuilder.MCpu; + Conf.MAttrs.push_back(TMBuilder.MAttr); + Conf.RelocModel = TMBuilder.RelocModel; + Conf.CGOptLevel = TMBuilder.CGOptLevel; + Conf.Freestanding = Freestanding; + SmallString<40> Key; + computeLTOCacheKey(Key, Conf, Index, ModuleID, ImportList, ExportList, + ResolvedODR, DefinedGVSummaries); + + // This choice of file name allows the cache to be pruned (see pruneCache() + // in include/llvm/Support/CachePruning.h). + sys::path::append(EntryPath, CachePath, "llvmcache-" + Key); + } + + // Access the path to this entry in the cache. + StringRef getEntryPath() { return EntryPath; } + + // Try loading the buffer for this cache entry. + ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() { + if (EntryPath.empty()) + return std::error_code(); + SmallString<64> ResultPath; + Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead( + Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath); + if (!FDOrErr) + return errorToErrorCode(FDOrErr.takeError()); + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getOpenFile( + *FDOrErr, EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false); + sys::fs::closeFile(*FDOrErr); + return MBOrErr; + } + + // Cache the Produced object file + void write(const MemoryBuffer &OutputBuffer) { + if (EntryPath.empty()) + return; + + // Write to a temporary to avoid race condition + SmallString<128> TempFilename; + SmallString<128> CachePath(EntryPath); + llvm::sys::path::remove_filename(CachePath); + sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o"); + + if (auto Err = handleErrors( + llvm::writeFileAtomically(TempFilename, EntryPath, + OutputBuffer.getBuffer()), + [](const llvm::AtomicFileWriteError &E) { + std::string ErrorMsgBuffer; + llvm::raw_string_ostream S(ErrorMsgBuffer); + E.log(S); + + if (E.Error == + llvm::atomic_write_error::failed_to_create_uniq_file) { + errs() << "Error: " << ErrorMsgBuffer << "\n"; + report_fatal_error("ThinLTO: Can't get a temporary file"); + } + })) { + // FIXME + consumeError(std::move(Err)); + } + } +}; + +static std::unique_ptr<MemoryBuffer> +ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, + StringMap<lto::InputFile *> &ModuleMap, TargetMachine &TM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, + const GVSummaryMapTy &DefinedGlobals, + const ThinLTOCodeGenerator::CachingOptions &CacheOptions, + bool DisableCodeGen, StringRef SaveTempsDir, + bool Freestanding, unsigned OptLevel, unsigned count) { + + // "Benchmark"-like optimization: single-source case + bool SingleModule = (ModuleMap.size() == 1); + + if (!SingleModule) { + promoteModule(TheModule, Index); + + // Apply summary-based prevailing-symbol resolution decisions. + thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals); + + // Save temps: after promotion. + saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc"); + } + + // Be friendly and don't nuke totally the module when the client didn't + // supply anything to preserve. + if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) { + // Apply summary-based internalization decisions. + thinLTOInternalizeModule(TheModule, DefinedGlobals); + } + + // Save internalized bitcode + saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc"); + + if (!SingleModule) { + crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); + + // Save temps: after cross-module import. + saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); + } + + optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index); + + saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); + + if (DisableCodeGen) { + // Configured to stop before CodeGen, serialize the bitcode and return. + SmallVector<char, 128> OutputBuffer; + { + raw_svector_ostream OS(OutputBuffer); + ProfileSummaryInfo PSI(TheModule); + auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI); + WriteBitcodeToFile(TheModule, OS, true, &Index); + } + return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer)); + } + + return codegenModule(TheModule, TM); +} + +/// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map +/// for caching, and in the \p Index for application during the ThinLTO +/// backends. This is needed for correctness for exported symbols (ensure +/// at least one copy kept) and a compile-time optimization (to drop duplicate +/// copies when possible). +static void resolvePrevailingInIndex( + ModuleSummaryIndex &Index, + StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> + &ResolvedODR, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, + const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> + &PrevailingCopy) { + + auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { + const auto &Prevailing = PrevailingCopy.find(GUID); + // Not in map means that there was only one copy, which must be prevailing. + if (Prevailing == PrevailingCopy.end()) + return true; + return Prevailing->second == S; + }; + + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + + thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage, + GUIDPreservedSymbols); +} + +// Initialize the TargetMachine builder for a given Triple +static void initTMBuilder(TargetMachineBuilder &TMBuilder, + const Triple &TheTriple) { + // Set a default CPU for Darwin triples (copied from LTOCodeGenerator). + // FIXME this looks pretty terrible... + if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) { + if (TheTriple.getArch() == llvm::Triple::x86_64) + TMBuilder.MCpu = "core2"; + else if (TheTriple.getArch() == llvm::Triple::x86) + TMBuilder.MCpu = "yonah"; + else if (TheTriple.getArch() == llvm::Triple::aarch64 || + TheTriple.getArch() == llvm::Triple::aarch64_32) + TMBuilder.MCpu = "cyclone"; + } + TMBuilder.TheTriple = std::move(TheTriple); +} + +} // end anonymous namespace + +void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { + MemoryBufferRef Buffer(Data, Identifier); + + auto InputOrError = lto::InputFile::create(Buffer); + if (!InputOrError) + report_fatal_error("ThinLTO cannot create input file: " + + toString(InputOrError.takeError())); + + auto TripleStr = (*InputOrError)->getTargetTriple(); + Triple TheTriple(TripleStr); + + if (Modules.empty()) + initTMBuilder(TMBuilder, Triple(TheTriple)); + else if (TMBuilder.TheTriple != TheTriple) { + if (!TMBuilder.TheTriple.isCompatibleWith(TheTriple)) + report_fatal_error("ThinLTO modules with incompatible triples not " + "supported"); + initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple))); + } + + Modules.emplace_back(std::move(*InputOrError)); +} + +void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { + PreservedSymbols.insert(Name); +} + +void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { + // FIXME: At the moment, we don't take advantage of this extra information, + // we're conservatively considering cross-references as preserved. + // CrossReferencedSymbols.insert(Name); + PreservedSymbols.insert(Name); +} + +// TargetMachine factory +std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { + std::string ErrMsg; + const Target *TheTarget = + TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg); + if (!TheTarget) { + report_fatal_error("Can't load target for this Triple: " + ErrMsg); + } + + // Use MAttr as the default set of features. + SubtargetFeatures Features(MAttr); + Features.getDefaultSubtargetFeatures(TheTriple); + std::string FeatureStr = Features.getString(); + + return std::unique_ptr<TargetMachine>( + TheTarget->createTargetMachine(TheTriple.str(), MCpu, FeatureStr, Options, + RelocModel, None, CGOptLevel)); +} + +/** + * Produce the combined summary index from all the bitcode files: + * "thin-link". + */ +std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { + std::unique_ptr<ModuleSummaryIndex> CombinedIndex = + std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false); + uint64_t NextModuleId = 0; + for (auto &Mod : Modules) { + auto &M = Mod->getSingleBitcodeModule(); + if (Error Err = + M.readSummary(*CombinedIndex, Mod->getName(), NextModuleId++)) { + // FIXME diagnose + logAllUnhandledErrors( + std::move(Err), errs(), + "error: can't create module summary index for buffer: "); + return nullptr; + } + } + return CombinedIndex; +} + +struct IsExported { + const StringMap<FunctionImporter::ExportSetTy> &ExportLists; + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols; + + IsExported(const StringMap<FunctionImporter::ExportSetTy> &ExportLists, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) + : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {} + + bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const { + const auto &ExportList = ExportLists.find(ModuleIdentifier); + return (ExportList != ExportLists.end() && + ExportList->second.count(GUID)) || + GUIDPreservedSymbols.count(GUID); + } +}; + +struct IsPrevailing { + const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy; + IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> + &PrevailingCopy) + : PrevailingCopy(PrevailingCopy) {} + + bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const { + const auto &Prevailing = PrevailingCopy.find(GUID); + // Not in map means that there was only one copy, which must be prevailing. + if (Prevailing == PrevailingCopy.end()) + return true; + return Prevailing->second == S; + }; +}; + +static void computeDeadSymbolsInIndex( + ModuleSummaryIndex &Index, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + // We have no symbols resolution available. And can't do any better now in the + // case where the prevailing symbol is in a native object. It can be refined + // with linker information in the future. + auto isPrevailing = [&](GlobalValue::GUID G) { + return PrevailingType::Unknown; + }; + computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing, + /* ImportEnabled = */ true); +} + +/** + * Perform promotion and renaming of exported internal functions. + * Index is updated to reflect linkage changes from weak resolution. + */ +void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index, + const lto::InputFile &File) { + auto ModuleCount = Index.modulePaths().size(); + auto ModuleIdentifier = TheModule.getModuleIdentifier(); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries; + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + // Add used symbol to the preserved symbols. + addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); + + // Compute "dead" symbols, we don't want to import/export these! + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); + + // Generate import/export list + StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); + StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + + DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; + computePrevailingCopies(Index, PrevailingCopy); + + // Resolve prevailing symbols + StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; + resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); + + thinLTOResolvePrevailingInModule( + TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); + + // Promote the exported values in the index, so that they are promoted + // in the module. + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); + + promoteModule(TheModule, Index); +} + +/** + * Perform cross-module importing for the module identified by ModuleIdentifier. + */ +void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, + ModuleSummaryIndex &Index, + const lto::InputFile &File) { + auto ModuleMap = generateModuleMap(Modules); + auto ModuleCount = Index.modulePaths().size(); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); + + // Compute "dead" symbols, we don't want to import/export these! + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); + + // Generate import/export list + StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); + StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; + + crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); +} + +/** + * Compute the list of summaries needed for importing into module. + */ +void ThinLTOCodeGenerator::gatherImportedSummariesForModule( + Module &TheModule, ModuleSummaryIndex &Index, + std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex, + const lto::InputFile &File) { + auto ModuleCount = Index.modulePaths().size(); + auto ModuleIdentifier = TheModule.getModuleIdentifier(); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); + + // Compute "dead" symbols, we don't want to import/export these! + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); + + // Generate import/export list + StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); + StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + + llvm::gatherImportedSummariesForModule( + ModuleIdentifier, ModuleToDefinedGVSummaries, + ImportLists[ModuleIdentifier], ModuleToSummariesForIndex); +} + +/** + * Emit the list of files needed for importing into module. + */ +void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName, + ModuleSummaryIndex &Index, + const lto::InputFile &File) { + auto ModuleCount = Index.modulePaths().size(); + auto ModuleIdentifier = TheModule.getModuleIdentifier(); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); + + // Compute "dead" symbols, we don't want to import/export these! + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); + + // Generate import/export list + StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); + StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + + std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex; + llvm::gatherImportedSummariesForModule( + ModuleIdentifier, ModuleToDefinedGVSummaries, + ImportLists[ModuleIdentifier], ModuleToSummariesForIndex); + + std::error_code EC; + if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName, + ModuleToSummariesForIndex))) + report_fatal_error(Twine("Failed to open ") + OutputName + + " to save imports lists\n"); +} + +/** + * Perform internalization. Runs promote and internalization together. + * Index is updated to reflect linkage changes. + */ +void ThinLTOCodeGenerator::internalize(Module &TheModule, + ModuleSummaryIndex &Index, + const lto::InputFile &File) { + initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + auto ModuleCount = Index.modulePaths().size(); + auto ModuleIdentifier = TheModule.getModuleIdentifier(); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + + addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Compute "dead" symbols, we don't want to import/export these! + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); + + // Generate import/export list + StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); + StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + auto &ExportList = ExportLists[ModuleIdentifier]; + + // Be friendly and don't nuke totally the module when the client didn't + // supply anything to preserve. + if (ExportList.empty() && GUIDPreservedSymbols.empty()) + return; + + DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; + computePrevailingCopies(Index, PrevailingCopy); + + // Resolve prevailing symbols + StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; + resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); + + // Promote the exported values in the index, so that they are promoted + // in the module. + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); + + promoteModule(TheModule, Index); + + // Internalization + thinLTOResolvePrevailingInModule( + TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); + + thinLTOInternalizeModule(TheModule, + ModuleToDefinedGVSummaries[ModuleIdentifier]); +} + +/** + * Perform post-importing ThinLTO optimizations. + */ +void ThinLTOCodeGenerator::optimize(Module &TheModule) { + initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + + // Optimize now + optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding, + nullptr); +} + +/// Write out the generated object file, either from CacheEntryPath or from +/// OutputBuffer, preferring hard-link when possible. +/// Returns the path to the generated file in SavedObjectsDirectoryPath. +std::string +ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath, + const MemoryBuffer &OutputBuffer) { + auto ArchName = TMBuilder.TheTriple.getArchName(); + SmallString<128> OutputPath(SavedObjectsDirectoryPath); + llvm::sys::path::append(OutputPath, + Twine(count) + "." + ArchName + ".thinlto.o"); + OutputPath.c_str(); // Ensure the string is null terminated. + if (sys::fs::exists(OutputPath)) + sys::fs::remove(OutputPath); + + // We don't return a memory buffer to the linker, just a list of files. + if (!CacheEntryPath.empty()) { + // Cache is enabled, hard-link the entry (or copy if hard-link fails). + auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath); + if (!Err) + return OutputPath.str(); + // Hard linking failed, try to copy. + Err = sys::fs::copy_file(CacheEntryPath, OutputPath); + if (!Err) + return OutputPath.str(); + // Copy failed (could be because the CacheEntry was removed from the cache + // in the meantime by another process), fall back and try to write down the + // buffer to the output. + errs() << "error: can't link or copy from cached entry '" << CacheEntryPath + << "' to '" << OutputPath << "'\n"; + } + // No cache entry, just write out the buffer. + std::error_code Err; + raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None); + if (Err) + report_fatal_error("Can't open output '" + OutputPath + "'\n"); + OS << OutputBuffer.getBuffer(); + return OutputPath.str(); +} + +// Main entry point for the ThinLTO processing +void ThinLTOCodeGenerator::run() { + // Prepare the resulting object vector + assert(ProducedBinaries.empty() && "The generator should not be reused"); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries.resize(Modules.size()); + else { + sys::fs::create_directories(SavedObjectsDirectoryPath); + bool IsDir; + sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); + if (!IsDir) + report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); + ProducedBinaryFiles.resize(Modules.size()); + } + + if (CodeGenOnly) { + // Perform only parallel codegen and return. + ThreadPool Pool; + int count = 0; + for (auto &Mod : Modules) { + Pool.async([&](int count) { + LLVMContext Context; + Context.setDiscardValueNames(LTODiscardValueNames); + + // Parse module now + auto TheModule = loadModuleFromInput(Mod.get(), Context, false, + /*IsImporting*/ false); + + // CodeGen + auto OutputBuffer = codegenModule(*TheModule, *TMBuilder.create()); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(OutputBuffer); + else + ProducedBinaryFiles[count] = + writeGeneratedObject(count, "", *OutputBuffer); + }, count++); + } + + return; + } + + // Sequential linking phase + auto Index = linkCombinedIndex(); + + // Save temps: index. + if (!SaveTempsDir.empty()) { + auto SaveTempPath = SaveTempsDir + "index.bc"; + std::error_code EC; + raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); + if (EC) + report_fatal_error(Twine("Failed to open ") + SaveTempPath + + " to save optimized bitcode\n"); + WriteIndexToFile(*Index, OS); + } + + + // Prepare the module map. + auto ModuleMap = generateModuleMap(Modules); + auto ModuleCount = Modules.size(); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); + Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Convert the preserved symbols set from string to GUID, this is needed for + // computing the caching hash and the internalization. + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + + // Add used symbol from inputs to the preserved symbols. + for (const auto &M : Modules) + addUsedSymbolToPreservedGUID(*M, GUIDPreservedSymbols); + + // Compute "dead" symbols, we don't want to import/export these! + computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols); + + // Synthesize entry counts for functions in the combined index. + computeSyntheticCounts(*Index); + + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap; + std::set<GlobalValue::GUID> ExportedGUIDs; + runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap); + for (auto GUID : ExportedGUIDs) + GUIDPreservedSymbols.insert(GUID); + + // Collect the import/export lists for all modules from the call-graph in the + // combined index. + StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); + StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); + ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + + // We use a std::map here to be able to have a defined ordering when + // producing a hash for the cache entry. + // FIXME: we should be able to compute the caching hash for the entry based + // on the index, and nuke this map. + StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; + + DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; + computePrevailingCopies(*Index, PrevailingCopy); + + // Resolve prevailing symbols, this has to be computed early because it + // impacts the caching. + resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); + + // Use global summary-based analysis to identify symbols that can be + // internalized (because they aren't exported or preserved as per callback). + // Changes are made in the index, consumed in the ThinLTO backends. + updateIndexWPDForExports(*Index, + IsExported(ExportLists, GUIDPreservedSymbols), + LocalWPDTargetsMap); + thinLTOInternalizeAndPromoteInIndex( + *Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); + + // Make sure that every module has an entry in the ExportLists, ImportList, + // GVSummary and ResolvedODR maps to enable threaded access to these maps + // below. + for (auto &Module : Modules) { + auto ModuleIdentifier = Module->getName(); + ExportLists[ModuleIdentifier]; + ImportLists[ModuleIdentifier]; + ResolvedODR[ModuleIdentifier]; + ModuleToDefinedGVSummaries[ModuleIdentifier]; + } + + // Compute the ordering we will process the inputs: the rough heuristic here + // is to sort them per size so that the largest module get schedule as soon as + // possible. This is purely a compile-time optimization. + std::vector<int> ModulesOrdering; + ModulesOrdering.resize(Modules.size()); + std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); + llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) { + auto LSize = + Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size(); + auto RSize = + Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size(); + return LSize > RSize; + }); + + // Parallel optimizer + codegen + { + ThreadPool Pool(ThreadCount); + for (auto IndexCount : ModulesOrdering) { + auto &Mod = Modules[IndexCount]; + Pool.async([&](int count) { + auto ModuleIdentifier = Mod->getName(); + auto &ExportList = ExportLists[ModuleIdentifier]; + + auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier]; + + // The module may be cached, this helps handling it. + ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, + ImportLists[ModuleIdentifier], ExportList, + ResolvedODR[ModuleIdentifier], + DefinedGVSummaries, OptLevel, Freestanding, + TMBuilder); + auto CacheEntryPath = CacheEntry.getEntryPath(); + + { + auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); + LLVM_DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") + << " '" << CacheEntryPath << "' for buffer " + << count << " " << ModuleIdentifier << "\n"); + + if (ErrOrBuffer) { + // Cache Hit! + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(ErrOrBuffer.get()); + else + ProducedBinaryFiles[count] = writeGeneratedObject( + count, CacheEntryPath, *ErrOrBuffer.get()); + return; + } + } + + LLVMContext Context; + Context.setDiscardValueNames(LTODiscardValueNames); + Context.enableDebugTypeODRUniquing(); + auto DiagFileOrErr = lto::setupOptimizationRemarks( + Context, RemarksFilename, RemarksPasses, RemarksFormat, + RemarksWithHotness, count); + if (!DiagFileOrErr) { + errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n"; + report_fatal_error("ThinLTO: Can't get an output file for the " + "remarks"); + } + + // Parse module now + auto TheModule = loadModuleFromInput(Mod.get(), Context, false, + /*IsImporting*/ false); + + // Save temps: original file. + saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); + + auto &ImportList = ImportLists[ModuleIdentifier]; + // Run the main process now, and generates a binary + auto OutputBuffer = ProcessThinLTOModule( + *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, + ExportList, GUIDPreservedSymbols, + ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, + DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count); + + // Commit to the cache (if enabled) + CacheEntry.write(*OutputBuffer); + + if (SavedObjectsDirectoryPath.empty()) { + // We need to generated a memory buffer for the linker. + if (!CacheEntryPath.empty()) { + // When cache is enabled, reload from the cache if possible. + // Releasing the buffer from the heap and reloading it from the + // cache file with mmap helps us to lower memory pressure. + // The freed memory can be used for the next input file. + // The final binary link will read from the VFS cache (hopefully!) + // or from disk (if the memory pressure was too high). + auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); + if (auto EC = ReloadedBufferOrErr.getError()) { + // On error, keep the preexisting buffer and print a diagnostic. + errs() << "error: can't reload cached file '" << CacheEntryPath + << "': " << EC.message() << "\n"; + } else { + OutputBuffer = std::move(*ReloadedBufferOrErr); + } + } + ProducedBinaries[count] = std::move(OutputBuffer); + return; + } + ProducedBinaryFiles[count] = writeGeneratedObject( + count, CacheEntryPath, *OutputBuffer); + }, IndexCount); + } + } + + pruneCache(CacheOptions.Path, CacheOptions.Policy); + + // If statistics were requested, print them out now. + if (llvm::AreStatisticsEnabled()) + llvm::PrintStatistics(); + reportAndResetTimings(); +} diff --git a/llvm/lib/LTO/UpdateCompilerUsed.cpp b/llvm/lib/LTO/UpdateCompilerUsed.cpp new file mode 100644 index 0000000000000..6434f902088d1 --- /dev/null +++ b/llvm/lib/LTO/UpdateCompilerUsed.cpp @@ -0,0 +1,132 @@ +//==-LTOInternalize.cpp - LLVM Link Time Optimizer Internalization Utility -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a helper to run the internalization part of LTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/legacy/UpdateCompilerUsed.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Mangler.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +namespace { + +// Helper class that collects AsmUsed and user supplied libcalls. +class PreserveLibCallsAndAsmUsed { +public: + PreserveLibCallsAndAsmUsed(const StringSet<> &AsmUndefinedRefs, + const TargetMachine &TM, + std::vector<GlobalValue *> &LLVMUsed) + : AsmUndefinedRefs(AsmUndefinedRefs), TM(TM), LLVMUsed(LLVMUsed) {} + + void findInModule(Module &TheModule) { + initializeLibCalls(TheModule); + for (Function &F : TheModule) + findLibCallsAndAsm(F); + for (GlobalVariable &GV : TheModule.globals()) + findLibCallsAndAsm(GV); + for (GlobalAlias &GA : TheModule.aliases()) + findLibCallsAndAsm(GA); + } + +private: + // Inputs + const StringSet<> &AsmUndefinedRefs; + const TargetMachine &TM; + + // Temps + llvm::Mangler Mangler; + StringSet<> Libcalls; + + // Output + std::vector<GlobalValue *> &LLVMUsed; + + // Collect names of runtime library functions. User-defined functions with the + // same names are added to llvm.compiler.used to prevent them from being + // deleted by optimizations. + void initializeLibCalls(const Module &TheModule) { + TargetLibraryInfoImpl TLII(Triple(TM.getTargetTriple())); + TargetLibraryInfo TLI(TLII); + + // TargetLibraryInfo has info on C runtime library calls on the current + // target. + for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs); + I != E; ++I) { + LibFunc F = static_cast<LibFunc>(I); + if (TLI.has(F)) + Libcalls.insert(TLI.getName(F)); + } + + SmallPtrSet<const TargetLowering *, 1> TLSet; + + for (const Function &F : TheModule) { + const TargetLowering *Lowering = + TM.getSubtargetImpl(F)->getTargetLowering(); + + if (Lowering && TLSet.insert(Lowering).second) + // TargetLowering has info on library calls that CodeGen expects to be + // available, both from the C runtime and compiler-rt. + for (unsigned I = 0, E = static_cast<unsigned>(RTLIB::UNKNOWN_LIBCALL); + I != E; ++I) + if (const char *Name = + Lowering->getLibcallName(static_cast<RTLIB::Libcall>(I))) + Libcalls.insert(Name); + } + } + + void findLibCallsAndAsm(GlobalValue &GV) { + // There are no restrictions to apply to declarations. + if (GV.isDeclaration()) + return; + + // There is nothing more restrictive than private linkage. + if (GV.hasPrivateLinkage()) + return; + + // Conservatively append user-supplied runtime library functions (supplied + // either directly, or via a function alias) to llvm.compiler.used. These + // could be internalized and deleted by optimizations like -globalopt, + // causing problems when later optimizations add new library calls (e.g., + // llvm.memset => memset and printf => puts). + // Leave it to the linker to remove any dead code (e.g. with -dead_strip). + GlobalValue *FuncAliasee = nullptr; + if (isa<GlobalAlias>(GV)) { + auto *A = cast<GlobalAlias>(&GV); + FuncAliasee = dyn_cast<Function>(A->getAliasee()); + } + if ((isa<Function>(GV) || FuncAliasee) && Libcalls.count(GV.getName())) { + LLVMUsed.push_back(&GV); + return; + } + + SmallString<64> Buffer; + TM.getNameWithPrefix(Buffer, &GV, Mangler); + if (AsmUndefinedRefs.count(Buffer)) + LLVMUsed.push_back(&GV); + } +}; + +} // namespace anonymous + +void llvm::updateCompilerUsed(Module &TheModule, const TargetMachine &TM, + const StringSet<> &AsmUndefinedRefs) { + std::vector<GlobalValue *> UsedValues; + PreserveLibCallsAndAsmUsed(AsmUndefinedRefs, TM, UsedValues) + .findInModule(TheModule); + + if (UsedValues.empty()) + return; + + appendToCompilerUsed(TheModule, UsedValues); +} |
