diff options
Diffstat (limited to 'llvm/lib/LTO')
-rw-r--r-- | llvm/lib/LTO/Caching.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/LTO/LTO.cpp | 185 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOBackend.cpp | 106 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOCodeGenerator.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOModule.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 63 | ||||
-rw-r--r-- | llvm/lib/LTO/UpdateCompilerUsed.cpp | 1 |
7 files changed, 275 insertions, 116 deletions
diff --git a/llvm/lib/LTO/Caching.cpp b/llvm/lib/LTO/Caching.cpp index 12dcd182de2d0..46cac3fb18302 100644 --- a/llvm/lib/LTO/Caching.cpp +++ b/llvm/lib/LTO/Caching.cpp @@ -144,7 +144,7 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath, // This CacheStream will move the temporary file into the cache when done. return std::make_unique<CacheStream>( std::make_unique<raw_fd_ostream>(Temp->FD, /* ShouldClose */ false), - AddBuffer, std::move(*Temp), EntryPath.str(), Task); + AddBuffer, std::move(*Temp), std::string(EntryPath.str()), Task); }; }; } diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 297b11de17a90..6e1e3998e490e 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -12,6 +12,8 @@ #include "llvm/LTO/LTO.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -21,10 +23,10 @@ #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMRemarkStreamer.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/RemarkStreamer.h" #include "llvm/LTO/LTOBackend.h" #include "llvm/LTO/SummaryBasedOptimizations.h" #include "llvm/Linker/IRMover.h" @@ -39,6 +41,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -96,22 +99,12 @@ void llvm::computeLTOCacheKey( }; auto AddUnsigned = [&](unsigned I) { uint8_t Data[4]; - Data[0] = I; - Data[1] = I >> 8; - Data[2] = I >> 16; - Data[3] = I >> 24; + support::endian::write32le(Data, I); Hasher.update(ArrayRef<uint8_t>{Data, 4}); }; auto AddUint64 = [&](uint64_t I) { uint8_t Data[8]; - Data[0] = I; - Data[1] = I >> 8; - Data[2] = I >> 16; - Data[3] = I >> 24; - Data[4] = I >> 32; - Data[5] = I >> 40; - Data[6] = I >> 48; - Data[7] = I >> 56; + support::endian::write64le(Data, I); Hasher.update(ArrayRef<uint8_t>{Data, 8}); }; AddString(Conf.CPU); @@ -147,8 +140,17 @@ void llvm::computeLTOCacheKey( // Include the hash for the current module auto ModHash = Index.getModuleHash(ModuleID); Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + + std::vector<uint64_t> ExportsGUID; + ExportsGUID.reserve(ExportList.size()); for (const auto &VI : ExportList) { auto GUID = VI.getGUID(); + ExportsGUID.push_back(GUID); + } + + // Sort the export list elements GUIDs. + llvm::sort(ExportsGUID); + for (uint64_t GUID : ExportsGUID) { // The export list can impact the internalization, be conservative here Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID))); } @@ -156,12 +158,23 @@ void llvm::computeLTOCacheKey( // Include the hash for every module we import functions from. The set of // imported symbols for each module may affect code generation and is // sensitive to link order, so include that as well. - for (auto &Entry : ImportList) { - auto ModHash = Index.getModuleHash(Entry.first()); + using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator; + std::vector<ImportMapIteratorTy> ImportModulesVector; + ImportModulesVector.reserve(ImportList.size()); + + for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end(); + ++It) { + ImportModulesVector.push_back(It); + } + llvm::sort(ImportModulesVector, + [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs) + -> bool { return Lhs->getKey() < Rhs->getKey(); }); + for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) { + auto ModHash = Index.getModuleHash(EntryIt->first()); Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); - AddUint64(Entry.second.size()); - for (auto &Fn : Entry.second) + AddUint64(EntryIt->second.size()); + for (auto &Fn : EntryIt->second) AddUint64(Fn); } @@ -513,7 +526,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, assert(!GlobalRes.Prevailing && "Multiple prevailing defs are not allowed"); GlobalRes.Prevailing = true; - GlobalRes.IRName = Sym.getIRName(); + GlobalRes.IRName = std::string(Sym.getIRName()); } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) { // Sometimes it can be two copies of symbol in a module and prevailing // symbol can have no IR name. That might happen if symbol is defined in @@ -521,7 +534,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, // the same symbol we want to use IR name of the prevailing symbol. // Otherwise, if we haven't seen a prevailing symbol, set the name so that // we can later use it to check if there is any prevailing copy in IR. - GlobalRes.IRName = Sym.getIRName(); + GlobalRes.IRName = std::string(Sym.getIRName()); } // Set the partition to external if we know it is re-defined by the linker @@ -611,6 +624,7 @@ Error LTO::addModule(InputFile &Input, unsigned ModI, if (LTOInfo->IsThinLTO) return addThinLTO(BM, ModSyms, ResI, ResE); + RegularLTO.EmptyCombinedModule = false; Expected<RegularLTOState::AddedModule> ModOrErr = addRegularLTO(BM, ModSyms, ResI, ResE); if (!ModOrErr) @@ -762,10 +776,11 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, if (Sym.isCommon()) { // FIXME: We should figure out what to do about commons defined by asm. // For now they aren't reported correctly by ModuleSymbolTable. - auto &CommonRes = RegularLTO.Commons[Sym.getIRName()]; + auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())]; CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); - CommonRes.Align = - std::max(CommonRes.Align, MaybeAlign(Sym.getCommonAlignment())); + MaybeAlign SymAlign(Sym.getCommonAlignment()); + if (SymAlign) + CommonRes.Align = max(*SymAlign, CommonRes.Align); CommonRes.Prevailing |= Res.Prevailing; } @@ -781,8 +796,15 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, bool LivenessFromIndex) { std::vector<GlobalValue *> Keep; for (GlobalValue *GV : Mod.Keep) { - if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) + if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) { + if (Function *F = dyn_cast<Function>(GV)) { + OptimizationRemarkEmitter ORE(F); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F) + << ore::NV("Function", F) + << " not added to the combined module "); + } continue; + } if (!GV->hasAvailableExternallyLinkage()) { Keep.push_back(GV); @@ -849,12 +871,28 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, "Expected at most one ThinLTO module per bitcode file", inconvertibleErrorCode()); + if (!Conf.ThinLTOModulesToCompile.empty()) { + if (!ThinLTO.ModulesToCompile) + ThinLTO.ModulesToCompile = ModuleMapType(); + // This is a fuzzy name matching where only modules with name containing the + // specified switch values are going to be compiled. + for (const std::string &Name : Conf.ThinLTOModulesToCompile) { + if (BM.getModuleIdentifier().contains(Name)) { + ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM}); + llvm::errs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier() + << " to compile\n"; + } + } + } + return Error::success(); } unsigned LTO::getMaxTasks() const { CalledGetMaxTasks = true; - return RegularLTO.ParallelCodeGenParallelismLevel + ThinLTO.ModuleMap.size(); + auto ModuleCount = ThinLTO.ModulesToCompile ? ThinLTO.ModulesToCompile->size() + : ThinLTO.ModuleMap.size(); + return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount; } // If only some of the modules were split, we cannot correctly handle @@ -931,17 +969,6 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { return StatsFileOrErr.takeError(); std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get()); - // Finalize linking of regular LTO modules containing summaries now that - // we have computed liveness information. - for (auto &M : RegularLTO.ModsWithSummaries) - if (Error Err = linkRegularLTO(std::move(M), - /*LivenessFromIndex=*/true)) - return Err; - - // Ensure we don't have inconsistently split LTO units with type tests. - if (Error Err = checkPartiallySplit()) - return Err; - Error Result = runRegularLTO(AddStream); if (!Result) Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); @@ -953,6 +980,27 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { } Error LTO::runRegularLTO(AddStreamFn AddStream) { + // Setup optimization remarks. + auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( + RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename, + Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness); + if (!DiagFileOrErr) + return DiagFileOrErr.takeError(); + + // Finalize linking of regular LTO modules containing summaries now that + // we have computed liveness information. + for (auto &M : RegularLTO.ModsWithSummaries) + if (Error Err = linkRegularLTO(std::move(M), + /*LivenessFromIndex=*/true)) + return Err; + + // Ensure we don't have inconsistently split LTO units with type tests. + // FIXME: this checks both LTO and ThinLTO. It happens to work as we take + // this path both cases but eventually this should be split into two and + // do the ThinLTO checks in `runThinLTO`. + if (Error Err = checkPartiallySplit()) + return Err; + // Make sure commons have the right size/alignment: we kept the largest from // all the prevailing when adding the inputs, and we apply it here. const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); @@ -982,6 +1030,11 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { } } + // If allowed, upgrade public vcall visibility metadata to linkage unit + // visibility before whole program devirtualization in the optimizer. + updateVCallVisibilityInModule(*RegularLTO.CombinedModule, + Conf.HasWholeProgramVisibility); + if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); @@ -1012,8 +1065,15 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); } - return backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, - std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex); + + if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) { + if (Error Err = backend( + Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, + std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex)) + return Err; + } + + return finalizeOptimizationRemarks(std::move(*DiagFileOrErr)); } static const char *libcallRoutineNames[] = { @@ -1063,12 +1123,12 @@ class InProcessThinBackend : public ThinBackendProc { public: InProcessThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, - unsigned ThinLTOParallelismLevel, + ThreadPoolStrategy ThinLTOParallelism, const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, AddStreamFn AddStream, NativeObjectCache Cache) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), - BackendThreadPool(ThinLTOParallelismLevel), - AddStream(std::move(AddStream)), Cache(std::move(Cache)) { + BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)), + Cache(std::move(Cache)) { for (auto &Name : CombinedIndex.cfiFunctionDefs()) CfiFunctionDefs.insert( GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); @@ -1133,6 +1193,9 @@ public: &ResolvedODR, const GVSummaryMapTy &DefinedGlobals, MapVector<StringRef, BitcodeModule> &ModuleMap) { + if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) + timeTraceProfilerInitialize(Conf.TimeTraceGranularity, + "thin backend"); Error E = runThinLTOBackendThread( AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList, ResolvedODR, DefinedGlobals, ModuleMap); @@ -1143,6 +1206,8 @@ public: else Err = std::move(E); } + if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) + timeTraceProfilerFinishThread(); }, BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList), std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap)); @@ -1159,13 +1224,13 @@ public: }; } // end anonymous namespace -ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) { +ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism) { return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, AddStreamFn AddStream, NativeObjectCache Cache) { return std::make_unique<InProcessThinBackend>( - Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries, - AddStream, Cache); + Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream, + Cache); }; } @@ -1186,7 +1251,7 @@ std::string lto::getThinLTOOutputFile(const std::string &Path, llvm::errs() << "warning: could not create directory '" << ParentPath << "': " << EC.message() << '\n'; } - return NewPath.str(); + return std::string(NewPath.str()); } namespace { @@ -1215,7 +1280,7 @@ public: MapVector<StringRef, BitcodeModule> &ModuleMap) override { StringRef ModulePath = BM.getModuleIdentifier(); std::string NewModulePath = - getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); + getThinLTOOutputFile(std::string(ModulePath), OldPrefix, NewPrefix); if (LinkedObjectsFile) *LinkedObjectsFile << NewModulePath << '\n'; @@ -1239,7 +1304,7 @@ public: } if (OnWrite) - OnWrite(ModulePath); + OnWrite(std::string(ModulePath)); return Error::success(); } @@ -1264,6 +1329,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, if (ThinLTO.ModuleMap.empty()) return Error::success(); + if (ThinLTO.ModulesToCompile && ThinLTO.ModulesToCompile->empty()) { + llvm::errs() << "warning: [ThinLTO] No module compiled\n"; + return Error::success(); + } + if (Conf.CombinedIndexHook && !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols)) return Error::success(); @@ -1299,6 +1369,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, std::set<GlobalValue::GUID> ExportedGUIDs; + // If allowed, upgrade public vcall visibility to linkage unit visibility in + // the summaries before whole program devirtualization below. + updateVCallVisibilityInIndex(ThinLTO.CombinedIndex, + Conf.HasWholeProgramVisibility); + // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead // performing IR-based WPD in hybrid regular/thin LTO mode). @@ -1359,14 +1434,19 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, thinLTOResolvePrevailingInIndex(ThinLTO.CombinedIndex, isPrevailing, recordNewLinkage, GUIDPreservedSymbols); + generateParamAccessSummary(ThinLTO.CombinedIndex); + std::unique_ptr<ThinBackendProc> BackendProc = ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); + auto &ModuleMap = + ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined // module and parallel code generation partitions. unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel; - for (auto &Mod : ThinLTO.ModuleMap) { + for (auto &Mod : ModuleMap) { if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], ExportLists[Mod.first], ResolvedODR[Mod.first], ThinLTO.ModuleMap)) @@ -1377,11 +1457,10 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, return BackendProc->wait(); } -Expected<std::unique_ptr<ToolOutputFile>> -lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, - StringRef RemarksPasses, StringRef RemarksFormat, - bool RemarksWithHotness, int Count) { - std::string Filename = RemarksFilename; +Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks( + LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, + StringRef RemarksFormat, bool RemarksWithHotness, int Count) { + std::string Filename = std::string(RemarksFilename); // For ThinLTO, file.opt.<format> becomes // file.opt.<format>.thin.<num>.<format>. if (!Filename.empty() && Count != -1) @@ -1389,7 +1468,7 @@ lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, (Twine(Filename) + ".thin." + llvm::utostr(Count) + "." + RemarksFormat) .str(); - auto ResultOrErr = llvm::setupOptimizationRemarks( + auto ResultOrErr = llvm::setupLLVMOptimizationRemarks( Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness); if (Error E = ResultOrErr.takeError()) return std::move(E); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index dcde7277b8202..0c395f9bbf280 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -16,18 +16,20 @@ #include "llvm/LTO/LTOBackend.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/LLVMRemarkStreamer.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/RemarkStreamer.h" #include "llvm/IR/Verifier.h" #include "llvm/LTO/LTO.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -61,8 +63,10 @@ Error Config::addSaveTemps(std::string OutputFileName, std::error_code EC; ResolutionFile = std::make_unique<raw_fd_ostream>( OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::OF_Text); - if (EC) + if (EC) { + ResolutionFile.reset(); return errorCodeToError(EC); + } auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { // Keep track of the hook provided by the linker, which also needs to run. @@ -125,6 +129,29 @@ Error Config::addSaveTemps(std::string OutputFileName, return Error::success(); } +#define HANDLE_EXTENSION(Ext) \ + llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + +static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins, + PassBuilder &PB) { +#define HANDLE_EXTENSION(Ext) \ + get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); +#include "llvm/Support/Extension.def" + + // Load requested pass plugins and let them register pass builder callbacks + for (auto &PluginFN : PassPlugins) { + auto PassPlugin = PassPlugin::Load(PluginFN); + if (!PassPlugin) { + errs() << "Failed to load passes from '" << PluginFN + << "'. Request ignored.\n"; + continue; + } + + PassPlugin->registerPassBuilderCallbacks(PB); + } +} + namespace { std::unique_ptr<TargetMachine> @@ -179,6 +206,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, if (auto Err = PB.parseAAPipeline(AA, "default")) report_fatal_error("Error parsing default AA pipeline"); + RegisterPassPlugins(Conf.PassPlugins, PB); + LoopAnalysisManager LAM(Conf.DebugPassManager); FunctionAnalysisManager FAM(Conf.DebugPassManager); CGSCCAnalysisManager CGAM(Conf.DebugPassManager); @@ -203,16 +232,16 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, default: llvm_unreachable("Invalid optimization level"); case 0: - OL = PassBuilder::O0; + OL = PassBuilder::OptimizationLevel::O0; break; case 1: - OL = PassBuilder::O1; + OL = PassBuilder::OptimizationLevel::O1; break; case 2: - OL = PassBuilder::O2; + OL = PassBuilder::OptimizationLevel::O2; break; case 3: - OL = PassBuilder::O3; + OL = PassBuilder::OptimizationLevel::O3; break; } @@ -226,8 +255,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, // FIXME (davide): verify the output. } -static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM, - std::string PipelineDesc, +static void runNewPMCustomPasses(const Config &Conf, Module &Mod, + TargetMachine *TM, std::string PipelineDesc, std::string AAPipelineDesc, bool DisableVerify) { PassBuilder PB(TM); @@ -239,6 +268,8 @@ static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM, report_fatal_error("unable to parse AA pipeline description '" + AAPipelineDesc + "': " + toString(std::move(Err))); + RegisterPassPlugins(Conf.PassPlugins, PB); + LoopAnalysisManager LAM; FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; @@ -305,7 +336,7 @@ bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, const ModuleSummaryIndex *ImportSummary) { // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) - runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, + runNewPMCustomPasses(Conf, Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); else if (Conf.UseNewPM) runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary, @@ -333,7 +364,8 @@ static void EmitBitcodeSection(Module &M, const Config &Conf) { } void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, - unsigned Task, Module &Mod) { + unsigned Task, Module &Mod, + const ModuleSummaryIndex &CombinedIndex) { if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) return; @@ -349,7 +381,7 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, DwoFile = Conf.DwoDir; sys::path::append(DwoFile, std::to_string(Task) + ".dwo"); - TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str(); + TM->Options.MCOptions.SplitDwarfFile = std::string(DwoFile); } else TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile; @@ -362,6 +394,8 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, auto Stream = AddStream(Task); legacy::PassManager CodeGenPasses; + CodeGenPasses.add( + createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, DwoOut ? &DwoOut->os() : nullptr, Conf.CGFileType)) @@ -374,8 +408,10 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream, unsigned ParallelCodeGenParallelismLevel, - std::unique_ptr<Module> Mod) { - ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel); + std::unique_ptr<Module> Mod, + const ModuleSummaryIndex &CombinedIndex) { + ThreadPool CodegenThreadPool( + heavyweight_hardware_concurrency(ParallelCodeGenParallelismLevel)); unsigned ThreadCount = 0; const Target *T = &TM->getTarget(); @@ -406,7 +442,8 @@ void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream, std::unique_ptr<TargetMachine> TM = createTargetMachine(C, T, *MPartInCtx); - codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx); + codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx, + CombinedIndex); }, // Pass BC using std::move to ensure that it get moved rather than // copied into the thread's context. @@ -434,8 +471,8 @@ Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) { } } -static Error -finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) { +Error lto::finalizeOptimizationRemarks( + std::unique_ptr<ToolOutputFile> DiagOutputFile) { // Make sure we flush the diagnostic remarks file in case the linker doesn't // call the global destructors before exiting. if (!DiagOutputFile) @@ -455,27 +492,19 @@ Error lto::backend(const Config &C, AddStreamFn AddStream, std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod); - // Setup optimization remarks. - auto DiagFileOrErr = lto::setupOptimizationRemarks( - Mod->getContext(), C.RemarksFilename, C.RemarksPasses, C.RemarksFormat, - C.RemarksWithHotness); - if (!DiagFileOrErr) - return DiagFileOrErr.takeError(); - auto DiagnosticOutputFile = std::move(*DiagFileOrErr); - if (!C.CodeGenOnly) { if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false, /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) - return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + return Error::success(); } if (ParallelCodeGenParallelismLevel == 1) { - codegen(C, TM.get(), AddStream, 0, *Mod); + codegen(C, TM.get(), AddStream, 0, *Mod, CombinedIndex); } else { splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, - std::move(Mod)); + std::move(Mod), CombinedIndex); } - return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + return Error::success(); } static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, @@ -511,22 +540,32 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod); // Setup optimization remarks. - auto DiagFileOrErr = lto::setupOptimizationRemarks( + auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( Mod.getContext(), Conf.RemarksFilename, Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness, Task); if (!DiagFileOrErr) return DiagFileOrErr.takeError(); auto DiagnosticOutputFile = std::move(*DiagFileOrErr); + // Set the partial sample profile ratio in the profile summary module flag of + // the module, if applicable. + Mod.setPartialSampleProfileRatio(CombinedIndex); + if (Conf.CodeGenOnly) { - codegen(Conf, TM.get(), AddStream, Task, Mod); + codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); } if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); - renameModuleForThinLTO(Mod, CombinedIndex); + // When linking an ELF shared object, dso_local should be dropped. We + // conservatively do this for -fpic. + bool ClearDSOLocalOnDeclarations = + TM->getTargetTriple().isOSBinFormatELF() && + TM->getRelocationModel() != Reloc::Static && + Mod.getPIELevel() == PIELevel::Default; + renameModuleForThinLTO(Mod, CombinedIndex, ClearDSOLocalOnDeclarations); dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex); @@ -552,7 +591,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, /*IsImporting*/ true); }; - FunctionImporter Importer(CombinedIndex, ModuleLoader); + FunctionImporter Importer(CombinedIndex, ModuleLoader, + ClearDSOLocalOnDeclarations); if (Error Err = Importer.importFunctions(Mod, ImportList).takeError()) return Err; @@ -563,6 +603,6 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); - codegen(Conf, TM.get(), AddStream, Task, Mod); + codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); } diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 5fef14230a9bb..25ab1404b4e12 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -29,11 +29,11 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LLVMRemarkStreamer.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassTimingInfo.h" -#include "llvm/IR/RemarkStreamer.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/LTO/LTO.h" @@ -57,6 +57,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <system_error> @@ -133,10 +134,12 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeSimpleInlinerPass(R); initializePruneEHPass(R); initializeGlobalDCELegacyPassPass(R); + initializeOpenMPOptLegacyPassPass(R); initializeArgPromotionPass(R); initializeJumpThreadingPass(R); initializeSROALegacyPassPass(R); initializeAttributorLegacyPassPass(R); + initializeAttributorCGSCCLegacyPassPass(R); initializePostOrderFunctionAttrsLegacyPassPass(R); initializeReversePostOrderFunctionAttrsLegacyPassPass(R); initializeGlobalsAAWrapperPassPass(R); @@ -526,8 +529,8 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, return false; auto DiagFileOrErr = - lto::setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses, - RemarksFormat, RemarksWithHotness); + lto::setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses, + RemarksFormat, RemarksWithHotness); if (!DiagFileOrErr) { errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n"; report_fatal_error("Can't get an output file for the remarks"); @@ -542,6 +545,13 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, } StatsFile = std::move(StatsFileOrErr.get()); + // Currently there is no support for enabling whole program visibility via a + // linker option in the old LTO API, but this call allows it to be specified + // via the internal option. Must be done before WPD invoked via the optimizer + // pipeline run below. + updateVCallVisibilityInModule(*MergedModule, + /* WholeProgramVisibilityEnabledInLTO */ false); + // We always run the verifier once on the merged module, the `DisableVerify` // parameter only applies to subsequent verify. verifyMergedModuleOnce(); @@ -622,9 +632,9 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) { return true; } -void LTOCodeGenerator::setCodeGenDebugOptions(ArrayRef<const char *> Options) { +void LTOCodeGenerator::setCodeGenDebugOptions(ArrayRef<StringRef> Options) { for (StringRef Option : Options) - CodegenOptions.push_back(Option); + CodegenOptions.push_back(Option.str()); } void LTOCodeGenerator::parseCodeGenDebugOptions() { diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp index 587b332e70649..ebe779aea62ea 100644 --- a/llvm/lib/LTO/LTOModule.cpp +++ b/llvm/lib/LTO/LTOModule.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" @@ -413,9 +414,8 @@ void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) { void LTOModule::addDefinedSymbol(StringRef Name, const GlobalValue *def, bool isFunction) { - // set alignment part log2() can have rounding errors - uint32_t align = def->getAlignment(); - uint32_t attr = align ? countTrailingZeros(align) : 0; + const GlobalObject *go = dyn_cast<GlobalObject>(def); + uint32_t attr = go ? Log2(go->getAlign().valueOrOne()) : 0; // set permissions part if (isFunction) { @@ -676,3 +676,11 @@ const char *LTOModule::getDependentLibrary(lto::InputFile *input, size_t index, *size = S.size(); return S.data(); } + +Expected<uint32_t> LTOModule::getMachOCPUType() const { + return MachO::getCPUType(Triple(Mod->getTargetTriple())); +} + +Expected<uint32_t> LTOModule::getMachOCPUSubType() const { + return MachO::getCPUSubType(Triple(Mod->getTargetTriple())); +} diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index a9e27832917cb..d0a1e1889c610 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -27,10 +27,10 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LLVMRemarkStreamer.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/PassTimingInfo.h" -#include "llvm/IR/RemarkStreamer.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/LTO/LTO.h" @@ -48,7 +48,6 @@ #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ToolOutputFile.h" -#include "llvm/Support/VCSRevision.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionImport.h" @@ -81,8 +80,10 @@ extern cl::opt<std::string> RemarksFormat; namespace { -static cl::opt<int> - ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency())); +// Default to using all available threads in the system, but using only one +// thred per core, as indicated by the usage of +// heavyweight_hardware_concurrency() below. +static cl::opt<int> ThreadCount("threads", cl::init(0)); // Simple helper to save temporary files for debug. static void saveTempBitcode(const Module &TheModule, StringRef TempDir, @@ -151,8 +152,9 @@ generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) { return ModuleMap; } -static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) { - if (renameModuleForThinLTO(TheModule, Index)) +static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index, + bool ClearDSOLocalOnDeclarations) { + if (renameModuleForThinLTO(TheModule, Index, ClearDSOLocalOnDeclarations)) report_fatal_error("renameModuleForThinLTO failed"); } @@ -204,15 +206,16 @@ static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input, static void crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, - StringMap<lto::InputFile*> &ModuleMap, - const FunctionImporter::ImportMapTy &ImportList) { + StringMap<lto::InputFile *> &ModuleMap, + const FunctionImporter::ImportMapTy &ImportList, + bool ClearDSOLocalOnDeclarations) { auto Loader = [&](StringRef Identifier) { auto &Input = ModuleMap[Identifier]; return loadModuleFromInput(Input, TheModule.getContext(), /*Lazy=*/true, /*IsImporting*/ true); }; - FunctionImporter Importer(Index, Loader); + FunctionImporter Importer(Index, Loader, ClearDSOLocalOnDeclarations); Expected<bool> Result = Importer.importFunctions(TheModule, ImportList); if (!Result) { handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) { @@ -410,8 +413,15 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); + // When linking an ELF shared object, dso_local should be dropped. We + // conservatively do this for -fpic. + bool ClearDSOLocalOnDeclarations = + TM.getTargetTriple().isOSBinFormatELF() && + TM.getRelocationModel() != Reloc::Static && + TheModule.getPIELevel() == PIELevel::Default; + if (!SingleModule) { - promoteModule(TheModule, Index); + promoteModule(TheModule, Index, ClearDSOLocalOnDeclarations); // Apply summary-based prevailing-symbol resolution decisions. thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals); @@ -431,7 +441,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc"); if (!SingleModule) { - crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); + crossImportIntoModule(TheModule, Index, ModuleMap, ImportList, + ClearDSOLocalOnDeclarations); // Save temps: after cross-module import. saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); @@ -672,7 +683,8 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index, Index, IsExported(ExportLists, GUIDPreservedSymbols), IsPrevailing(PrevailingCopy)); - promoteModule(TheModule, Index); + // FIXME Set ClearDSOLocalOnDeclarations. + promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false); } /** @@ -704,7 +716,9 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, ExportLists); auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; - crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); + // FIXME Set ClearDSOLocalOnDeclarations. + crossImportIntoModule(TheModule, Index, ModuleMap, ImportList, + /*ClearDSOLocalOnDeclarations=*/false); } /** @@ -831,7 +845,8 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, Index, IsExported(ExportLists, GUIDPreservedSymbols), IsPrevailing(PrevailingCopy)); - promoteModule(TheModule, Index); + // FIXME Set ClearDSOLocalOnDeclarations. + promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false); // Internalization thinLTOResolvePrevailingInModule( @@ -871,15 +886,15 @@ ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath, // Cache is enabled, hard-link the entry (or copy if hard-link fails). auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath); if (!Err) - return OutputPath.str(); + return std::string(OutputPath.str()); // Hard linking failed, try to copy. Err = sys::fs::copy_file(CacheEntryPath, OutputPath); if (!Err) - return OutputPath.str(); + return std::string(OutputPath.str()); // Copy failed (could be because the CacheEntry was removed from the cache // in the meantime by another process), fall back and try to write down the // buffer to the output. - errs() << "error: can't link or copy from cached entry '" << CacheEntryPath + errs() << "remark: can't link or copy from cached entry '" << CacheEntryPath << "' to '" << OutputPath << "'\n"; } // No cache entry, just write out the buffer. @@ -888,7 +903,7 @@ ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath, if (Err) report_fatal_error("Can't open output '" + OutputPath + "'\n"); OS << OutputBuffer.getBuffer(); - return OutputPath.str(); + return std::string(OutputPath.str()); } // Main entry point for the ThinLTO processing @@ -970,6 +985,12 @@ void ThinLTOCodeGenerator::run() { // Synthesize entry counts for functions in the combined index. computeSyntheticCounts(*Index); + // Currently there is no support for enabling whole program visibility via a + // linker option in the old LTO API, but this call allows it to be specified + // via the internal option. Must be done before WPD below. + updateVCallVisibilityInIndex(*Index, + /* WholeProgramVisibilityEnabledInLTO */ false); + // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead // performing IR-based WPD in hybrid regular/thin LTO mode). @@ -1037,7 +1058,7 @@ void ThinLTOCodeGenerator::run() { // Parallel optimizer + codegen { - ThreadPool Pool(ThreadCount); + ThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount)); for (auto IndexCount : ModulesOrdering) { auto &Mod = Modules[IndexCount]; Pool.async([&](int count) { @@ -1074,7 +1095,7 @@ void ThinLTOCodeGenerator::run() { LLVMContext Context; Context.setDiscardValueNames(LTODiscardValueNames); Context.enableDebugTypeODRUniquing(); - auto DiagFileOrErr = lto::setupOptimizationRemarks( + auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( Context, RemarksFilename, RemarksPasses, RemarksFormat, RemarksWithHotness, count); if (!DiagFileOrErr) { @@ -1113,7 +1134,7 @@ void ThinLTOCodeGenerator::run() { auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); if (auto EC = ReloadedBufferOrErr.getError()) { // On error, keep the preexisting buffer and print a diagnostic. - errs() << "error: can't reload cached file '" << CacheEntryPath + errs() << "remark: can't reload cached file '" << CacheEntryPath << "': " << EC.message() << "\n"; } else { OutputBuffer = std::move(*ReloadedBufferOrErr); diff --git a/llvm/lib/LTO/UpdateCompilerUsed.cpp b/llvm/lib/LTO/UpdateCompilerUsed.cpp index 6434f902088d1..040e1106523c4 100644 --- a/llvm/lib/LTO/UpdateCompilerUsed.cpp +++ b/llvm/lib/LTO/UpdateCompilerUsed.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; |