diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Coroutines/CoroSplit.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/Coroutines.cpp | 12 | ||||
-rw-r--r-- | lib/Transforms/IPO/FunctionAttrs.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/IPO/FunctionImport.cpp | 107 | ||||
-rw-r--r-- | lib/Transforms/IPO/LowerTypeTests.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 66 | ||||
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/MemorySanitizer.cpp | 1 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/SanitizerCoverage.cpp | 181 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 1 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LowerExpectIntrinsic.cpp | 162 | ||||
-rw-r--r-- | lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/Utils/CloneFunction.cpp | 71 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 151 |
17 files changed, 521 insertions, 272 deletions
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 613b4a7f03e98..626a891f65c6a 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -228,7 +228,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, SmallVector<ReturnInst *, 4> Returns; - CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/false, Returns); + CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); // Remove old returns. for (ReturnInst *Return : Returns) diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp index ea48043f9381f..44e1f9b404eda 100644 --- a/lib/Transforms/Coroutines/Coroutines.cpp +++ b/lib/Transforms/Coroutines/Coroutines.cpp @@ -218,6 +218,8 @@ void coro::Shape::buildFrom(Function &F) { size_t FinalSuspendIndex = 0; clear(*this); SmallVector<CoroFrameInst *, 8> CoroFrames; + SmallVector<CoroSaveInst *, 2> UnusedCoroSaves; + for (Instruction &I : instructions(F)) { if (auto II = dyn_cast<IntrinsicInst>(&I)) { switch (II->getIntrinsicID()) { @@ -229,6 +231,12 @@ void coro::Shape::buildFrom(Function &F) { case Intrinsic::coro_frame: CoroFrames.push_back(cast<CoroFrameInst>(II)); break; + case Intrinsic::coro_save: + // After optimizations, coro_suspends using this coro_save might have + // been removed, remember orphaned coro_saves to remove them later. + if (II->use_empty()) + UnusedCoroSaves.push_back(cast<CoroSaveInst>(II)); + break; case Intrinsic::coro_suspend: CoroSuspends.push_back(cast<CoroSuspendInst>(II)); if (CoroSuspends.back()->isFinal()) { @@ -311,4 +319,8 @@ void coro::Shape::buildFrom(Function &F) { if (HasFinalSuspend && FinalSuspendIndex != CoroSuspends.size() - 1) std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); + + // Remove orphaned coro.saves. + for (CoroSaveInst *CoroSave : UnusedCoroSaves) + CoroSave->eraseFromParent(); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 28cc81c76d4fb..5cc29a4937986 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1188,6 +1188,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { SCCNodes.insert(F); } + // Skip it if the SCC only contains optnone functions. + if (SCCNodes.empty()) + return Changed; + Changed |= addArgumentReturnedAttrs(SCCNodes); Changed |= addReadAttrs(SCCNodes, AARGetter); Changed |= addArgumentAttrs(SCCNodes); diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 231487923fada..6d34ab8b0d960 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -292,8 +292,7 @@ static void computeImportForFunction( static void ComputeImportForModule( const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList, - StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr, - const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) { + StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) { // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector<EdgeInfo, 128> Worklist; @@ -301,7 +300,7 @@ static void ComputeImportForModule( // Populate the worklist with the import for the functions in the current // module for (auto &GVSummary : DefinedGVSummaries) { - if (DeadSymbols && DeadSymbols->count(GVSummary.first)) { + if (!Index.isGlobalValueLive(GVSummary.second)) { DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n"); continue; } @@ -344,15 +343,14 @@ void llvm::ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, StringMap<FunctionImporter::ImportMapTy> &ImportLists, - StringMap<FunctionImporter::ExportSetTy> &ExportLists, - const DenseSet<GlobalValue::GUID> *DeadSymbols) { + StringMap<FunctionImporter::ExportSetTy> &ExportLists) { // For each module that has function defined, compute the import/export lists. for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { auto &ImportList = ImportLists[DefinedGVSummaries.first()]; DEBUG(dbgs() << "Computing import for Module '" << DefinedGVSummaries.first() << "'\n"); ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList, - &ExportLists, DeadSymbols); + &ExportLists); } // When computing imports we added all GUIDs referenced by anything @@ -414,82 +412,71 @@ void llvm::ComputeCrossModuleImportForModule( #endif } -DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols( - const ModuleSummaryIndex &Index, +void llvm::computeDeadSymbols( + ModuleSummaryIndex &Index, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + assert(!Index.withGlobalValueDeadStripping()); if (!ComputeDead) - return DenseSet<GlobalValue::GUID>(); + return; if (GUIDPreservedSymbols.empty()) // Don't do anything when nothing is live, this is friendly with tests. - return DenseSet<GlobalValue::GUID>(); - DenseSet<ValueInfo> LiveSymbols; + return; + unsigned LiveSymbols = 0; SmallVector<ValueInfo, 128> Worklist; Worklist.reserve(GUIDPreservedSymbols.size() * 2); for (auto GUID : GUIDPreservedSymbols) { ValueInfo VI = Index.getValueInfo(GUID); if (!VI) continue; - DEBUG(dbgs() << "Live root: " << VI.getGUID() << "\n"); - LiveSymbols.insert(VI); - Worklist.push_back(VI); + for (auto &S : VI.getSummaryList()) + S->setLive(true); } + // Add values flagged in the index as live roots to the worklist. - for (const auto &Entry : Index) { - bool IsLiveRoot = llvm::any_of( - Entry.second.SummaryList, - [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) { - return Summary->liveRoot(); - }); - if (!IsLiveRoot) - continue; - DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n"); - Worklist.push_back(ValueInfo(&Entry)); - } + for (const auto &Entry : Index) + for (auto &S : Entry.second.SummaryList) + if (S->isLive()) { + DEBUG(dbgs() << "Live root: " << Entry.first << "\n"); + Worklist.push_back(ValueInfo(&Entry)); + ++LiveSymbols; + break; + } + + // Make value live and add it to the worklist if it was not live before. + // FIXME: we should only make the prevailing copy live here + auto visit = [&](ValueInfo VI) { + for (auto &S : VI.getSummaryList()) + if (S->isLive()) + return; + for (auto &S : VI.getSummaryList()) + S->setLive(true); + ++LiveSymbols; + Worklist.push_back(VI); + }; while (!Worklist.empty()) { auto VI = Worklist.pop_back_val(); - - // FIXME: we should only make the prevailing copy live here for (auto &Summary : VI.getSummaryList()) { - for (auto Ref : Summary->refs()) { - if (LiveSymbols.insert(Ref).second) { - DEBUG(dbgs() << "Marking live (ref): " << Ref.getGUID() << "\n"); - Worklist.push_back(Ref); - } - } - if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) { - for (auto Call : FS->calls()) { - if (LiveSymbols.insert(Call.first).second) { - DEBUG(dbgs() << "Marking live (call): " << Call.first.getGUID() - << "\n"); - Worklist.push_back(Call.first); - } - } - } + for (auto Ref : Summary->refs()) + visit(Ref); + if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) + for (auto Call : FS->calls()) + visit(Call.first); if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) { auto AliaseeGUID = AS->getAliasee().getOriginalName(); ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID); - if (AliaseeVI && LiveSymbols.insert(AliaseeVI).second) { - DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n"); - Worklist.push_back(AliaseeVI); - } + if (AliaseeVI) + visit(AliaseeVI); } } } - DenseSet<GlobalValue::GUID> DeadSymbols; - DeadSymbols.reserve( - std::min(Index.size(), Index.size() - LiveSymbols.size())); - for (auto &Entry : Index) { - if (!LiveSymbols.count(ValueInfo(&Entry))) { - DEBUG(dbgs() << "Marking dead: " << Entry.first << "\n"); - DeadSymbols.insert(Entry.first); - } - } - DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and " - << DeadSymbols.size() << " symbols Dead \n"); - NumDeadSymbols += DeadSymbols.size(); - NumLiveSymbols += LiveSymbols.size(); - return DeadSymbols; + Index.setWithGlobalValueDeadStripping(); + + unsigned DeadSymbols = Index.size() - LiveSymbols; + DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols + << " symbols Dead \n"); + NumDeadSymbols += DeadSymbols; + NumLiveSymbols += LiveSymbols; } /// Compute the set of summaries needed for a ThinLTO backend compilation of diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index ca4ee92f971a1..7bec50d9d25f8 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1442,9 +1442,8 @@ bool LowerTypeTestsModule::lower() { for (auto &P : *ExportSummary) { for (auto &S : P.second.SummaryList) { auto *FS = dyn_cast<FunctionSummary>(S.get()); - if (!FS) + if (!FS || !ExportSummary->isGlobalValueLive(FS)) continue; - // FIXME: Only add live functions. for (GlobalValue::GUID G : FS->type_tests()) for (Metadata *MD : MetadataByGUID[G]) AddTypeIdUse(MD).IsExported = true; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index bc0967448cddf..ea805efc66b79 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -68,6 +68,10 @@ static cl::opt<int> cl::desc("Relative frequency of outline region to " "the entry block")); +static cl::opt<unsigned> ExtraOutliningPenalty( + "partial-inlining-extra-penalty", cl::init(0), cl::Hidden, + cl::desc("A debug option to add additional penalty to the computed one.")); + namespace { struct FunctionOutliningInfo { @@ -83,7 +87,7 @@ struct FunctionOutliningInfo { SmallVector<BasicBlock *, 4> Entries; // The return block that is not included in the outlined region. BasicBlock *ReturnBlock; - // The dominating block of the region ot be outlined. + // The dominating block of the region to be outlined. BasicBlock *NonReturnBlock; // The set of blocks in Entries that that are predecessors to ReturnBlock SmallVector<BasicBlock *, 4> ReturnBlockPreds; @@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( if (hasProfileData(F, OI)) return OutlineRegionRelFreq; - // When profile data is not available, we need to be very - // conservative in estimating the overall savings. We need to make sure - // the outline region relative frequency is not below the threshold - // specified by the option. - OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); + // When profile data is not available, we need to be conservative in + // estimating the overall savings. Static branch prediction can usually + // guess the branch direction right (taken/non-taken), but the guessed + // branch probability is usually not biased enough. In case when the + // outlined region is predicted to be likely, its probability needs + // to be made higher (more biased) to not under-estimate the cost of + // function outlining. On the other hand, if the outlined region + // is predicted to be less likely, the predicted probablity is usually + // higher than the actual. For instance, the actual probability of the + // less likely target is only 5%, but the guessed probablity can be + // 40%. In the latter case, there is no need for further adjustement. + // FIXME: add an option for this. + if (OutlineRegionRelFreq < BranchProbability(45, 100)) + return OutlineRegionRelFreq; + + OutlineRegionRelFreq = std::max( + OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); return OutlineRegionRelFreq; } @@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { if (isa<DbgInfoIntrinsic>(I)) continue; + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::Alloca: + continue; + case Instruction::GetElementPtr: + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + continue; + default: + break; + } + + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + if (CallInst *CI = dyn_cast<CallInst>(I)) { InlineCost += getCallsiteCost(CallSite(CI), DL); continue; @@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts( Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction, BasicBlock *OutliningCallBB) { // First compute the cost of the outlined region 'OI' in the original - // function 'F': + // function 'F'. + // FIXME: The code extractor (outliner) can now do code sinking/hoisting + // to reduce outlining cost. The hoisted/sunk code currently do not + // incur any runtime cost so it is still OK to compare the outlined + // function cost with the outlined region in the original function. + // If this ever changes, we will need to introduce new extractor api + // to pass the information. int OutlinedRegionCost = 0; for (BasicBlock &BB : *F) { if (&BB != OI->ReturnBlock && @@ -542,8 +584,14 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts( assert(OutlinedFunctionCost >= OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); - int OutliningRuntimeOverhead = - OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost); + // The code extractor introduces a new root and exit stub blocks with + // additional unconditional branches. Those branches will be eliminated + // later with bb layout. The cost should be adjusted accordingly: + OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; + + int OutliningRuntimeOverhead = OutliningFuncCallCost + + (OutlinedFunctionCost - OutlinedRegionCost) + + ExtraOutliningPenalty; return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead, OutlinedRegionCost); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9fd3a9021a270..16fba32e98056 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -157,7 +157,7 @@ static cl::opt<bool> static cl::opt<bool> EnableGVNSink( "enable-gvn-sink", cl::init(false), cl::Hidden, - cl::desc("Enable the GVN sinking pass (default = on)")); + cl::desc("Enable the GVN sinking pass (default = off)")); PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2c2b7317a1c09..c0798e164c39f 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4508,13 +4508,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Builder->CreateAnd(A, B), Op1); - // ~x < ~y --> y < x - // ~x < cst --> ~cst < x + // ~X < ~Y --> Y < X + // ~X < C --> X > ~C if (match(Op0, m_Not(m_Value(A)))) { if (match(Op1, m_Not(m_Value(B)))) return new ICmpInst(I.getPredicate(), B, A); - if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) - return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + + const APInt *C; + if (match(Op1, m_APInt(C))) + return new ICmpInst(I.getSwappedPredicate(), A, + ConstantInt::get(Op1->getType(), ~(*C))); } Instruction *AddI = nullptr; diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ff753c20a94af..df4ee9969c02f 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2087,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { switch (I.getNumArgOperands()) { case 3: assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode"); + LLVM_FALLTHROUGH; case 2: CopyOp = I.getArgOperand(0); ConvertOp = I.getArgOperand(1); diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 325b64cd8b436..8aa40d1759de6 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -57,6 +57,11 @@ static const char *const SanCovTracePCGuardName = "__sanitizer_cov_trace_pc_guard"; static const char *const SanCovTracePCGuardInitName = "__sanitizer_cov_trace_pc_guard_init"; +static const char *const SanCov8bitCountersInitName = + "__sanitizer_cov_8bit_counters_init"; + +static const char *const SanCovGuardsSectionName = "sancov_guards"; +static const char *const SanCovCountersSectionName = "sancov_counters"; static cl::opt<int> ClCoverageLevel( "sanitizer-coverage-level", @@ -64,14 +69,18 @@ static cl::opt<int> ClCoverageLevel( "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); -static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc", - cl::desc("Experimental pc tracing"), - cl::Hidden, cl::init(false)); +static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc", + cl::desc("Experimental pc tracing"), cl::Hidden, + cl::init(false)); static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard", cl::desc("pc tracing with a guard"), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", + cl::desc("increments 8-bit counter for every edge"), + cl::Hidden, cl::init(false)); + static cl::opt<bool> ClCMPTracing("sanitizer-coverage-trace-compares", cl::desc("Tracing of CMP and similar instructions"), @@ -123,9 +132,10 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { Options.TraceCmp |= ClCMPTracing; Options.TraceDiv |= ClDIVTracing; Options.TraceGep |= ClGEPTracing; - Options.TracePC |= ClExperimentalTracePC; + Options.TracePC |= ClTracePC; Options.TracePCGuard |= ClTracePCGuard; - if (!Options.TracePCGuard && !Options.TracePC) + Options.Inline8bitCounters |= ClInline8bitCounters; + if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters) Options.TracePCGuard = true; // TracePCGuard is default. Options.NoPrune |= !ClPruneBlocks; return Options; @@ -159,11 +169,22 @@ private: void InjectTraceForSwitch(Function &F, ArrayRef<Instruction *> SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); - void CreateFunctionGuardArray(size_t NumGuards, Function &F); + GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, + Function &F, Type *Ty, + const char *Section); + void CreateFunctionLocalArrays(size_t NumGuards, Function &F); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx); - StringRef getSanCovTracePCGuardSection() const; - StringRef getSanCovTracePCGuardSectionStart() const; - StringRef getSanCovTracePCGuardSectionEnd() const; + void CreateInitCallForSection(Module &M, const char *InitFunctionName, + Type *Ty, const std::string &Section); + + void SetNoSanitizeMetadata(Instruction *I) { + I->setMetadata(I->getModule()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); + } + + std::string getSectionName(const std::string &Section) const; + std::string getSectionStart(const std::string &Section) const; + std::string getSectionEnd(const std::string &Section) const; Function *SanCovTracePCIndir; Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; @@ -171,20 +192,48 @@ private: Function *SanCovTraceGepFunction; Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; - Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy; + Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, + *Int8Ty, *Int8PtrTy; Module *CurModule; Triple TargetTriple; LLVMContext *C; const DataLayout *DL; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. - bool HasSancovGuardsSection; + GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters. SanitizerCoverageOptions Options; }; } // namespace +void SanitizerCoverageModule::CreateInitCallForSection( + Module &M, const char *InitFunctionName, Type *Ty, + const std::string &Section) { + IRBuilder<> IRB(M.getContext()); + Function *CtorFunc; + GlobalVariable *SecStart = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, + getSectionStart(Section)); + SecStart->setVisibility(GlobalValue::HiddenVisibility); + GlobalVariable *SecEnd = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, + nullptr, getSectionEnd(Section)); + SecEnd->setVisibility(GlobalValue::HiddenVisibility); + + std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( + M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, + {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)}); + + if (TargetTriple.supportsCOMDAT()) { + // Use comdat to dedup CtorFunc. + CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); + } else { + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); + } +} + bool SanitizerCoverageModule::runOnModule(Module &M) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; @@ -192,15 +241,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { DL = &M.getDataLayout(); CurModule = &M; TargetTriple = Triple(M.getTargetTriple()); - HasSancovGuardsSection = false; + FunctionGuardArray = nullptr; + Function8bitCounterArray = nullptr; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); IntptrPtrTy = PointerType::getUnqual(IntptrTy); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64Ty = IRB.getInt64Ty(); Int32Ty = IRB.getInt32Ty(); + Int8Ty = IRB.getInt8Ty(); SanCovTracePCIndir = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); @@ -243,34 +295,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { for (auto &F : M) runOnFunction(F); - // Create variable for module (compilation unit) name - if (Options.TracePCGuard) { - if (HasSancovGuardsSection) { - Function *CtorFunc; - GlobalVariable *SecStart = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionStart()); - SecStart->setVisibility(GlobalValue::HiddenVisibility); - GlobalVariable *SecEnd = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionEnd()); - SecEnd->setVisibility(GlobalValue::HiddenVisibility); - - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovTracePCGuardInitName, - {Int32PtrTy, Int32PtrTy}, - {IRB.CreatePointerCast(SecStart, Int32PtrTy), - IRB.CreatePointerCast(SecEnd, Int32PtrTy)}); - - if (TargetTriple.supportsCOMDAT()) { - // Use comdat to dedup CtorFunc. - CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); - } else { - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } - } - } + if (FunctionGuardArray) + CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy, + SanCovGuardsSectionName); + if (Function8bitCounterArray) + CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy, + SanCovCountersSectionName); + return true; } @@ -393,17 +424,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { InjectTraceForGep(F, GepTraceTargets); return true; } -void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards, - Function &F) { - if (!Options.TracePCGuard) return; - HasSancovGuardsSection = true; - ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards); - FunctionGuardArray = new GlobalVariable( - *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage, - Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_"); + +GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( + size_t NumElements, Function &F, Type *Ty, const char *Section) { + ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); + auto Array = new GlobalVariable( + *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, + Constant::getNullValue(ArrayTy), "__sancov_gen_"); if (auto Comdat = F.getComdat()) - FunctionGuardArray->setComdat(Comdat); - FunctionGuardArray->setSection(getSanCovTracePCGuardSection()); + Array->setComdat(Comdat); + Array->setSection(getSectionName(Section)); + return Array; +} +void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards, + Function &F) { + if (Options.TracePCGuard) + FunctionGuardArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int32Ty, SanCovGuardsSectionName); + if (Options.Inline8bitCounters) + Function8bitCounterArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int8Ty, SanCovCountersSectionName); } bool SanitizerCoverageModule::InjectCoverage(Function &F, @@ -413,11 +453,11 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, case SanitizerCoverageOptions::SCK_None: return false; case SanitizerCoverageOptions::SCK_Function: - CreateFunctionGuardArray(1, F); + CreateFunctionLocalArrays(1, F); InjectCoverageAtBlock(F, F.getEntryBlock(), 0); return true; default: { - CreateFunctionGuardArray(AllBlocks.size(), F); + CreateFunctionLocalArrays(AllBlocks.size(), F); for (size_t i = 0, N = AllBlocks.size(); i < N; i++) InjectCoverageAtBlock(F, *AllBlocks[i], i); return true; @@ -436,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - assert(Options.TracePC || Options.TracePCGuard); + assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters); for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); @@ -564,8 +604,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else { - assert(Options.TracePCGuard); + } + if (Options.TracePCGuard) { auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), @@ -573,26 +613,39 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreateCall(SanCovTracePCGuard, GuardPtr); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } + if (Options.Inline8bitCounters) { + auto CounterPtr = IRB.CreateGEP( + Function8bitCounterArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(CounterPtr); + auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1)); + auto Store = IRB.CreateStore(Inc, CounterPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); + } } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const { +std::string +SanitizerCoverageModule::getSectionName(const std::string &Section) const { if (TargetTriple.getObjectFormat() == Triple::COFF) return ".SCOV$M"; if (TargetTriple.isOSBinFormatMachO()) - return "__DATA,__sancov_guards"; - return "__sancov_guards"; + return "__DATA,__" + Section; + return "__" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const { +std::string +SanitizerCoverageModule::getSectionStart(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$start$__DATA$__sancov_guards"; - return "__start___sancov_guards"; + return "\1section$start$__DATA$__" + Section; + return "__start___" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const { +std::string +SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$end$__DATA$__sancov_guards"; - return "__stop___sancov_guards"; + return "\1section$end$__DATA$__" + Section; + return "__stop___" + Section; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 3953198fe6052..9a7882211bac6 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1823,6 +1823,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { // An IV counter must preserve its type. if (IncI->getNumOperands() == 2) break; + LLVM_FALLTHROUGH; default: return nullptr; } diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 930696b036c00..7d8da9b453f9f 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -83,6 +84,149 @@ static bool handleSwitchExpect(SwitchInst &SI) { return true; } +/// Handler for PHINodes that define the value argument to an +/// @llvm.expect call. +/// +/// If the operand of the phi has a constant value and it 'contradicts' +/// with the expected value of phi def, then the corresponding incoming +/// edge of the phi is unlikely to be taken. Using that information, +/// the branch probability info for the originating branch can be inferred. +static void handlePhiDef(CallInst *Expect) { + Value &Arg = *Expect->getArgOperand(0); + ConstantInt *ExpectedValue = cast<ConstantInt>(Expect->getArgOperand(1)); + const APInt &ExpectedPhiValue = ExpectedValue->getValue(); + + // Walk up in backward a list of instructions that + // have 'copy' semantics by 'stripping' the copies + // until a PHI node or an instruction of unknown kind + // is reached. Negation via xor is also handled. + // + // C = PHI(...); + // B = C; + // A = B; + // D = __builtin_expect(A, 0); + // + Value *V = &Arg; + SmallVector<Instruction *, 4> Operations; + while (!isa<PHINode>(V)) { + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(V)) { + V = ZExt->getOperand(0); + Operations.push_back(ZExt); + continue; + } + + if (SExtInst *SExt = dyn_cast<SExtInst>(V)) { + V = SExt->getOperand(0); + Operations.push_back(SExt); + continue; + } + + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V); + if (!BinOp || BinOp->getOpcode() != Instruction::Xor) + return; + + ConstantInt *CInt = dyn_cast<ConstantInt>(BinOp->getOperand(1)); + if (!CInt) + return; + + V = BinOp->getOperand(0); + Operations.push_back(BinOp); + } + + // Executes the recorded operations on input 'Value'. + auto ApplyOperations = [&](const APInt &Value) { + APInt Result = Value; + for (auto Op : llvm::reverse(Operations)) { + switch (Op->getOpcode()) { + case Instruction::Xor: + Result ^= cast<ConstantInt>(Op->getOperand(1))->getValue(); + break; + case Instruction::ZExt: + Result = Result.zext(Op->getType()->getIntegerBitWidth()); + break; + case Instruction::SExt: + Result = Result.sext(Op->getType()->getIntegerBitWidth()); + break; + default: + llvm_unreachable("Unexpected operation"); + } + } + return Result; + }; + + auto *PhiDef = dyn_cast<PHINode>(V); + + // Get the first dominating conditional branch of the operand + // i's incoming block. + auto GetDomConditional = [&](unsigned i) -> BranchInst * { + BasicBlock *BB = PhiDef->getIncomingBlock(i); + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (BI && BI->isConditional()) + return BI; + BB = BB->getSinglePredecessor(); + if (!BB) + return nullptr; + BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + return nullptr; + return BI; + }; + + // Now walk through all Phi operands to find phi oprerands with values + // conflicting with the expected phi output value. Any such operand + // indicates the incoming edge to that operand is unlikely. + for (unsigned i = 0, e = PhiDef->getNumIncomingValues(); i != e; ++i) { + + Value *PhiOpnd = PhiDef->getIncomingValue(i); + ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd); + if (!CI) + continue; + + // Not an interesting case when IsUnlikely is false -- we can not infer + // anything useful when the operand value matches the expected phi + // output. + if (ExpectedPhiValue == ApplyOperations(CI->getValue())) + continue; + + BranchInst *BI = GetDomConditional(i); + if (!BI) + continue; + + MDBuilder MDB(PhiDef->getContext()); + + // There are two situations in which an operand of the PhiDef comes + // from a given successor of a branch instruction BI. + // 1) When the incoming block of the operand is the successor block; + // 2) When the incoming block is BI's enclosing block and the + // successor is the PhiDef's enclosing block. + // + // Returns true if the operand which comes from OpndIncomingBB + // comes from outgoing edge of BI that leads to Succ block. + auto *OpndIncomingBB = PhiDef->getIncomingBlock(i); + auto IsOpndComingFromSuccessor = [&](BasicBlock *Succ) { + if (OpndIncomingBB == Succ) + // If this successor is the incoming block for this + // Phi operand, then this successor does lead to the Phi. + return true; + if (OpndIncomingBB == BI->getParent() && Succ == PhiDef->getParent()) + // Otherwise, if the edge is directly from the branch + // to the Phi, this successor is the one feeding this + // Phi operand. + return true; + return false; + }; + + if (IsOpndComingFromSuccessor(BI->getSuccessor(1))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight)); + else if (IsOpndComingFromSuccessor(BI->getSuccessor(0))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight)); + } +} + // Handle both BranchInst and SelectInst. template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) { @@ -99,25 +243,31 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) { ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition()); CmpInst::Predicate Predicate; - uint64_t ValueComparedTo = 0; + ConstantInt *CmpConstOperand = nullptr; if (!CmpI) { CI = dyn_cast<CallInst>(BSI.getCondition()); Predicate = CmpInst::ICMP_NE; - ValueComparedTo = 0; } else { Predicate = CmpI->getPredicate(); if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ) return false; - ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1)); + + CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1)); if (!CmpConstOperand) return false; - ValueComparedTo = CmpConstOperand->getZExtValue(); CI = dyn_cast<CallInst>(CmpI->getOperand(0)); } if (!CI) return false; + uint64_t ValueComparedTo = 0; + if (CmpConstOperand) { + if (CmpConstOperand->getBitWidth() > 64) + return false; + ValueComparedTo = CmpConstOperand->getZExtValue(); + } + Function *Fn = CI->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) return false; @@ -181,6 +331,10 @@ static bool lowerExpectIntrinsic(Function &F) { Function *Fn = CI->getCalledFunction(); if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + // Before erasing the llvm.expect, walk backward to find + // phi that define llvm.expect's first arg, and + // infer branch probability: + handlePhiDef(CI); Value *Exp = CI->getArgOperand(0); CI->replaceAllUsesWith(Exp); CI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 77b2bd84f9b61..350b50ffcdd41 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// Rewrite an existing set of gc.statepoints such that they make potential -// relocations performed by the garbage collector explicit in the IR. +// Rewrite call/invoke instructions so as to make potential relocations +// performed by the garbage collector explicit in the IR. // //===----------------------------------------------------------------------===// @@ -2094,9 +2094,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // live in the IR. We'll remove all of these when done. SmallVector<CallInst *, 64> Holders; - // Insert a dummy call with all of the arguments to the vm_state we'll need - // for the actual safepoint insertion. This ensures reference arguments in - // the deopt argument list are considered live through the safepoint (and + // Insert a dummy call with all of the deopt operands we'll need for the + // actual safepoint insertion as arguments. This ensures reference operands + // in the deopt argument list are considered live through the safepoint (and // thus makes sure they get relocated.) for (CallSite CS : ToUpdate) { SmallVector<Value *, 64> DeoptValues; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 6e113bccff94c..fb1b5813fd79f 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -3698,7 +3698,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); + auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); + auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. LoadInst *PLoad; @@ -3709,7 +3710,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, LoadBasePtr->getName() + "."), + LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); } @@ -3719,7 +3720,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1ec3d0d496374..1c1a75c111e96 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -37,10 +37,10 @@ using namespace llvm; /// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - ValueToValueMapTy &VMap, +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo) { + ClonedCodeInfo *CodeInfo, + DebugInfoFinder *DIFinder) { DenseMap<const MDNode *, MDNode *> Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); @@ -50,10 +50,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // Loop over all instructions, and copy them over. for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { + + if (DIFinder && F->getParent() && II->getDebugLoc()) + DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get()); + Instruction *NewInst = II->clone(); - if (F && F->getSubprogram()) - DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(), - F->getSubprogram(), Cache); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -122,31 +123,38 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), OldAttrs.getRetAttributes(), NewArgAttrs)); + bool MustCloneSP = + OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); + DISubprogram *SP = OldFunc->getSubprogram(); + if (SP) { + assert(!MustCloneSP || ModuleLevelChanges); + // Add mappings for some DebugInfo nodes that we don't want duplicated + // even if they're distinct. + auto &MD = VMap.MD(); + MD[SP->getUnit()].reset(SP->getUnit()); + MD[SP->getType()].reset(SP->getType()); + MD[SP->getFile()].reset(SP->getFile()); + // If we're not cloning into the same module, no need to clone the + // subprogram + if (!MustCloneSP) + MD[SP].reset(SP); + } + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc->getAllMetadata(MDs); for (auto MD : MDs) { - MDNode *NewMD; - bool MustCloneSP = - (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() && - OldFunc->getParent() == NewFunc->getParent()); - if (MustCloneSP) { - auto *SP = cast<DISubprogram>(MD.second); - NewMD = DISubprogram::getDistinct( - NewFunc->getContext(), SP->getScope(), SP->getName(), - SP->getLinkageName(), SP->getFile(), SP->getLine(), SP->getType(), - SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(), - SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(), - SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(), - SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(), - SP->getVariables(), SP->getThrownTypes()); - } else - NewMD = - MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); - NewFunc->addMetadata(MD.first, *NewMD); + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); } + // When we remap instructions, we want to avoid duplicating inlined + // DISubprograms, so record all subprograms we find as we duplicate + // instructions and then freeze them in the MD map. + DebugInfoFinder DIFinder; + // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. @@ -156,7 +164,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, + SP ? &DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; @@ -178,6 +187,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } + for (DISubprogram *ISP : DIFinder.subprograms()) { + if (ISP != SP) { + VMap.MD()[ISP].reset(ISP); + } + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = @@ -226,7 +241,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "", + CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", CodeInfo); return NewF; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 8b9a64c220ccd..799eef21dc4ea 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4779,6 +4779,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { scalarizeInstruction(&I, true); break; } + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -7396,6 +7397,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // likely. return Cost / getReciprocalPredBlockProb(); } + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index e6f78e6b94a34..d1349535f2982 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -259,6 +259,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, if (hasVectorInstrinsicScalarOpd(ID, 1)) { return (CI->getArgOperand(1) == Scalar); } + LLVM_FALLTHROUGH; } default: return false; @@ -4749,56 +4750,18 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } -namespace { -/// Tracks instructons and its children. -class WeakTrackingVHWithLevel final : public CallbackVH { - /// Operand index of the instruction currently beeing analized. - unsigned Level = 0; - /// Is this the instruction that should be vectorized, or are we now - /// processing children (i.e. operands of this instruction) for potential - /// vectorization? - bool IsInitial = true; - -public: - explicit WeakTrackingVHWithLevel() = default; - WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){}; - /// Restart children analysis each time it is repaced by the new instruction. - void allUsesReplacedWith(Value *New) override { - setValPtr(New); - Level = 0; - IsInitial = true; - } - /// Check if the instruction was not deleted during vectorization. - bool isValid() const { return !getValPtr(); } - /// Is the istruction itself must be vectorized? - bool isInitial() const { return IsInitial; } - /// Try to vectorize children. - void clearInitial() { IsInitial = false; } - /// Are all children processed already? - bool isFinal() const { - assert(getValPtr() && - (isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() >= Level)); - return getValPtr() && - cast<Instruction>(getValPtr())->getNumOperands() == Level; - } - /// Get next child operation. - Value *nextOperand() { - assert(getValPtr() && isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() > Level); - return cast<Instruction>(getValPtr())->getOperand(Level++); - } - virtual ~WeakTrackingVHWithLevel() = default; -}; -} // namespace - -/// \brief Attempt to reduce a horizontal reduction. -/// If it is legal to match a horizontal reduction feeding -/// the phi node P with reduction operators Root in a basic block BB, then check -/// if it can be done. -/// \returns true if a horizontal reduction was matched and reduced. -/// \returns false if a horizontal reduction was not matched. -static bool canBeVectorized( +/// Attempt to reduce a horizontal reduction. +/// If it is legal to match a horizontal reduction feeding the phi node \a P +/// with reduction operators \a Root (or one of its operands) in a basic block +/// \a BB, then check if it can be done. If horizontal reduction is not found +/// and root instruction is a binary operation, vectorization of the operands is +/// attempted. +/// \returns true if a horizontal reduction was matched and reduced or operands +/// of one of the binary instruction were vectorized. +/// \returns false if a horizontal reduction was not matched (or not possible) +/// or no vectorization of any binary operation feeding \a Root instruction was +/// performed. +static bool tryToVectorizeHorReductionOrInstOperands( PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI, const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) { @@ -4810,56 +4773,62 @@ static bool canBeVectorized( if (Root->getParent() != BB) return false; - SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root); + // Start analysis starting from Root instruction. If horizontal reduction is + // found, try to vectorize it. If it is not a horizontal reduction or + // vectorization is not possible or not effective, and currently analyzed + // instruction is a binary operation, try to vectorize the operands, using + // pre-order DFS traversal order. If the operands were not vectorized, repeat + // the same procedure considering each operand as a possible root of the + // horizontal reduction. + // Interrupt the process if the Root instruction itself was vectorized or all + // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. + SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0}); SmallSet<Value *, 8> VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V = Stack.back(); - if (!V) { - Stack.pop_back(); + Value *V; + unsigned Level; + std::tie(V, Level) = Stack.pop_back_val(); + if (!V) continue; - } auto *Inst = dyn_cast<Instruction>(V); - if (!Inst || isa<PHINode>(Inst)) { - Stack.pop_back(); + if (!Inst || isa<PHINode>(Inst)) continue; - } - if (Stack.back().isInitial()) { - Stack.back().clearInitial(); - if (auto *BI = dyn_cast<BinaryOperator>(Inst)) { - HorizontalReduction HorRdx; - if (HorRdx.matchAssociativeReduction(P, BI)) { - if (HorRdx.tryToReduce(R, TTI)) { - Res = true; - P = nullptr; - continue; - } - } - if (P) { - Inst = dyn_cast<Instruction>(BI->getOperand(0)); - if (Inst == P) - Inst = dyn_cast<Instruction>(BI->getOperand(1)); - if (!Inst) { - P = nullptr; - continue; - } + if (auto *BI = dyn_cast<BinaryOperator>(Inst)) { + HorizontalReduction HorRdx; + if (HorRdx.matchAssociativeReduction(P, BI)) { + if (HorRdx.tryToReduce(R, TTI)) { + Res = true; + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; } } - P = nullptr; - if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) { - Res = true; - continue; + if (P) { + Inst = dyn_cast<Instruction>(BI->getOperand(0)); + if (Inst == P) + Inst = dyn_cast<Instruction>(BI->getOperand(1)); + if (!Inst) { + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; + } } } - if (Stack.back().isFinal()) { - Stack.pop_back(); + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) { + Res = true; continue; } - if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand())) - if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second && - Stack.size() < RecursionMaxDepth) - Stack.push_back(NextV); + // Try to vectorize operands. + if (++Level < RecursionMaxDepth) + for (auto *Op : Inst->operand_values()) + Stack.emplace_back(Op, Level); } return Res; } @@ -4876,10 +4845,10 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, if (!isa<BinaryOperator>(I)) P = nullptr; // Try to match and vectorize a horizontal reduction. - return canBeVectorized(P, I, BB, R, TTI, - [this](BinaryOperator *BI, BoUpSLP &R) -> bool { - return tryToVectorize(BI, R); - }); + return tryToVectorizeHorReductionOrInstOperands( + P, I, BB, R, TTI, [this](BinaryOperator *BI, BoUpSLP &R) -> bool { + return tryToVectorize(BI, R); + }); } bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { |