diff options
Diffstat (limited to 'lib/Transforms')
61 files changed, 1708 insertions, 609 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index a2c8a32dfe866..25db0eff8848e 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -106,9 +106,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, AttributeList PAL = F->getAttributes(); // First, determine the new argument list - unsigned ArgIndex = 0; + unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++ArgIndex) { + ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -120,7 +120,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); - ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex)); + ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; @@ -214,12 +214,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); - ArgIndex = 1; + ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++AI, ++ArgIndex) + ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument - ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -280,9 +280,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, } // Push any varargs arguments on the list. - for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { + for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); - ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; @@ -839,17 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { unsigned ArgNo = PtrArg->getArgNo(); - F->setAttributes( - F->getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + F->removeAttribute(ArgNo + 1, Attribute::StructRet); + F->addAttribute(ArgNo + 1, Attribute::NoAlias); for (Use &U : F->uses()) { CallSite CS(U.getUser()); - CS.setAttributes( - CS.getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, - Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + CS.removeAttribute(ArgNo + 1, Attribute::StructRet); + CS.addAttribute(ArgNo + 1, Attribute::NoAlias); } } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 9648883b7f275..031c3d8a9ebed 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -855,10 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { bool MadeChange = false; for (Function *F : SCCNodes) { - if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) + if (F->doesNotAlias(AttributeList::ReturnIndex) || + !F->getReturnType()->isPointerTy()) continue; - F->setDoesNotAlias(0); + F->setDoesNotAlias(AttributeList::ReturnIndex); ++NumNoAlias; MadeChange = true; } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index d66411f04cc4a..c7ef2494e3b8b 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IntrinsicInst.h" @@ -25,7 +26,6 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SourceMgr.h" diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ae9d4ce11e0db..f277a51ae659a 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // we delete a constant array, we may also be holding pointer to one of its // elements (or an element of one of its elements if we're dealing with an // array of arrays) in the worklist. - SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end()); + SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end()); while (!WorkList.empty()) { Value *UV = WorkList.pop_back_val(); if (!UV) @@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) { NewU->insertBefore(UI); UI->replaceUsesOfWith(U, NewU); } - U->dropAllReferences(); + // We've replaced all the uses, so destroy the constant. (destroyConstant + // will update value handles and metadata.) + U->destroyConstant(); } } diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index 9c83f88b2210c..a8b0f32fd785e 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation +required_libraries = Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 771770ddc0600..0e478ba607be2 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -207,11 +207,13 @@ private: /// A work queue of functions that may have been modified and should be /// analyzed again. - std::vector<WeakVH> Deferred; + std::vector<WeakTrackingVH> Deferred; /// Checks the rules of order relation introduced among functions set. /// Returns true, if sanity check has been passed, and false if failed. - bool doSanityCheck(std::vector<WeakVH> &Worklist); +#ifndef NDEBUG + bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist); +#endif /// Insert a ComparableFunction into the FnTree, or merge it away if it's /// equal to one that's already present. @@ -283,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } -bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { +#ifndef NDEBUG +bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { if (const unsigned Max = NumFunctionsForSanityCheck) { unsigned TripleNumber = 0; bool Valid = true; @@ -291,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; unsigned i = 0; - for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end(); + for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E && i < Max; ++I, ++i) { unsigned j = i; - for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { + for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max; + ++J, ++j) { Function *F1 = cast<Function>(*I); Function *F2 = cast<Function>(*J); int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); @@ -312,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { continue; unsigned k = j; - for (std::vector<WeakVH>::iterator K = J; K != E && k < Max; + for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max; ++k, ++K, ++TripleNumber) { if (K == J) continue; @@ -351,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { } return true; } +#endif bool MergeFunctions::runOnModule(Module &M) { if (skipModule(M)) @@ -381,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) { // consider merging it. Otherwise it is dropped and never considered again. if ((I != S && std::prev(I)->first == I->first) || (std::next(I) != IE && std::next(I)->first == I->first) ) { - Deferred.push_back(WeakVH(I->second)); + Deferred.push_back(WeakTrackingVH(I->second)); } } do { - std::vector<WeakVH> Worklist; + std::vector<WeakTrackingVH> Worklist; Deferred.swap(Worklist); DEBUG(doSanityCheck(Worklist)); @@ -395,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) { DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); // Insert functions and merge them. - for (WeakVH &I : Worklist) { + for (WeakTrackingVH &I : Worklist) { if (!I) continue; Function *F = cast<Function>(I); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 78e71c18fe290..1bb9d654ec1cc 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -16,8 +16,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -31,13 +35,18 @@ using namespace llvm; #define DEBUG_TYPE "partial-inlining" -STATISTIC(NumPartialInlined, "Number of functions partially inlined"); +STATISTIC(NumPartialInlined, + "Number of callsites functions partially inlined into."); // Command line option to disable partial-inlining. The default is false: static cl::opt<bool> DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial ininling")); +static cl::opt<unsigned> MaxNumInlineBlocks( + "max-num-inline-blocks", cl::init(5), cl::Hidden, + cl::desc("Max Number of Blocks To be Partially Inlined")); + // Command line option to set the maximum number of partial inlining allowed // for the module. The default value of -1 means no limit. static cl::opt<int> MaxNumPartialInlining( @@ -45,20 +54,52 @@ static cl::opt<int> MaxNumPartialInlining( cl::desc("Max number of partial inlining. The default is unlimited")); namespace { + +struct FunctionOutliningInfo { + FunctionOutliningInfo() + : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr), + ReturnBlockPreds() {} + // Returns the number of blocks to be inlined including all blocks + // in Entries and one return block. + unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; } + + // A set of blocks including the function entry that guard + // the region to be outlined. + SmallVector<BasicBlock *, 4> Entries; + // The return block that is not included in the outlined region. + BasicBlock *ReturnBlock; + // The dominating block of the region ot be outlined. + BasicBlock *NonReturnBlock; + // The set of blocks in Entries that that are predecessors to ReturnBlock + SmallVector<BasicBlock *, 4> ReturnBlockPreds; +}; + struct PartialInlinerImpl { - PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {} + PartialInlinerImpl( + std::function<AssumptionCache &(Function &)> *GetAC, + std::function<TargetTransformInfo &(Function &)> *GTTI, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI, + ProfileSummaryInfo *ProfSI) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} bool run(Module &M); Function *unswitchFunction(Function *F); + std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); + private: - InlineFunctionInfo IFI; int NumPartialInlining = 0; + std::function<AssumptionCache &(Function &)> *GetAssumptionCache; + std::function<TargetTransformInfo &(Function &)> *GetTTI; + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI; + ProfileSummaryInfo *PSI; + bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE); bool IsLimitReached() { return (MaxNumPartialInlining != -1 && NumPartialInlining >= MaxNumPartialInlining); } }; + struct PartialInlinerLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid PartialInlinerLegacyPass() : ModulePass(ID) { @@ -67,91 +108,319 @@ struct PartialInlinerLegacyPass : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } bool runOnModule(Module &M) override { if (skipModule(M)) return false; AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); + TargetTransformInfoWrapperPass *TTIWP = + &getAnalysis<TargetTransformInfoWrapperPass>(); + ProfileSummaryInfo *PSI = + getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - return PartialInlinerImpl(IFI).run(M); + + std::function<TargetTransformInfo &(Function &)> GetTTI = + [&TTIWP](Function &F) -> TargetTransformInfo & { + return TTIWP->getTTI(F); + }; + + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); } }; } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { - // First, verify that this function is an unswitching candidate... - if (F->hasAddressTaken()) - return nullptr; - +std::unique_ptr<FunctionOutliningInfo> +PartialInlinerImpl::computeOutliningInfo(Function *F) { BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) - return nullptr; + return std::unique_ptr<FunctionOutliningInfo>(); + + // Returns true if Succ is BB's successor + auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { + return is_contained(successors(BB), Succ); + }; + + auto SuccSize = [](BasicBlock *BB) { + return std::distance(succ_begin(BB), succ_end(BB)); + }; + + auto IsReturnBlock = [](BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + return isa<ReturnInst>(TI); + }; + + auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) { + if (IsReturnBlock(Succ1)) + return std::make_tuple(Succ1, Succ2); + if (IsReturnBlock(Succ2)) + return std::make_tuple(Succ2, Succ1); + + return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); + }; + + // Detect a triangular shape: + auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) { + if (IsSuccessor(Succ1, Succ2)) + return std::make_tuple(Succ1, Succ2); + if (IsSuccessor(Succ2, Succ1)) + return std::make_tuple(Succ2, Succ1); + + return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); + }; + + std::unique_ptr<FunctionOutliningInfo> OutliningInfo = + llvm::make_unique<FunctionOutliningInfo>(); + + BasicBlock *CurrEntry = EntryBlock; + bool CandidateFound = false; + do { + // The number of blocks to be inlined has already reached + // the limit. When MaxNumInlineBlocks is set to 0 or 1, this + // disables partial inlining for the function. + if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks) + break; + + if (SuccSize(CurrEntry) != 2) + break; + + BasicBlock *Succ1 = *succ_begin(CurrEntry); + BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1); + + BasicBlock *ReturnBlock, *NonReturnBlock; + std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); + + if (ReturnBlock) { + OutliningInfo->Entries.push_back(CurrEntry); + OutliningInfo->ReturnBlock = ReturnBlock; + OutliningInfo->NonReturnBlock = NonReturnBlock; + CandidateFound = true; + break; + } + + BasicBlock *CommSucc; + BasicBlock *OtherSucc; + std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2); + + if (!CommSucc) + break; + + OutliningInfo->Entries.push_back(CurrEntry); + CurrEntry = OtherSucc; + + } while (true); + + if (!CandidateFound) + return std::unique_ptr<FunctionOutliningInfo>(); + + // Do sanity check of the entries: threre should not + // be any successors (not in the entry set) other than + // {ReturnBlock, NonReturnBlock} + assert(OutliningInfo->Entries[0] == &F->front()); + DenseSet<BasicBlock *> Entries; + for (BasicBlock *E : OutliningInfo->Entries) + Entries.insert(E); + + // Returns true of BB has Predecessor which is not + // in Entries set. + auto HasNonEntryPred = [Entries](BasicBlock *BB) { + for (auto Pred : predecessors(BB)) { + if (!Entries.count(Pred)) + return true; + } + return false; + }; + auto CheckAndNormalizeCandidate = + [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) { + for (BasicBlock *E : OutliningInfo->Entries) { + for (auto Succ : successors(E)) { + if (Entries.count(Succ)) + continue; + if (Succ == OutliningInfo->ReturnBlock) + OutliningInfo->ReturnBlockPreds.push_back(E); + else if (Succ != OutliningInfo->NonReturnBlock) + return false; + } + // There should not be any outside incoming edges either: + if (HasNonEntryPred(E)) + return false; + } + return true; + }; + + if (!CheckAndNormalizeCandidate(OutliningInfo.get())) + return std::unique_ptr<FunctionOutliningInfo>(); - BasicBlock *ReturnBlock = nullptr; - BasicBlock *NonReturnBlock = nullptr; - unsigned ReturnCount = 0; - for (BasicBlock *BB : successors(EntryBlock)) { - if (isa<ReturnInst>(BB->getTerminator())) { - ReturnBlock = BB; - ReturnCount++; - } else - NonReturnBlock = BB; + // Now further growing the candidate's inlining region by + // peeling off dominating blocks from the outlining region: + while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) { + BasicBlock *Cand = OutliningInfo->NonReturnBlock; + if (SuccSize(Cand) != 2) + break; + + if (HasNonEntryPred(Cand)) + break; + + BasicBlock *Succ1 = *succ_begin(Cand); + BasicBlock *Succ2 = *(succ_begin(Cand) + 1); + + BasicBlock *ReturnBlock, *NonReturnBlock; + std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); + if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock) + break; + + if (NonReturnBlock->getSinglePredecessor() != Cand) + break; + + // Now grow and update OutlininigInfo: + OutliningInfo->Entries.push_back(Cand); + OutliningInfo->NonReturnBlock = NonReturnBlock; + OutliningInfo->ReturnBlockPreds.push_back(Cand); + Entries.insert(Cand); } - if (ReturnCount != 1) + return OutliningInfo; +} + +bool PartialInlinerImpl::shouldPartialInline(CallSite CS, + OptimizationRemarkEmitter &ORE) { + // TODO : more sharing with shouldInline in Inliner.cpp + using namespace ore; + Instruction *Call = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + auto &CalleeTTI = (*GetTTI)(*Callee); + InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, + *GetAssumptionCache, GetBFI, PSI); + + if (IC.isAlways()) { + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + << NV("Callee", Callee) + << " should always be fully inlined, not partially"); + return false; + } + + if (IC.isNever()) { + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Callee) << " not partially inlined into " + << NV("Caller", Caller) + << " because it should never be inlined (cost=never)"); + return false; + } + + if (!IC) { + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Callee) << " not partially inlined into " + << NV("Caller", Caller) << " because too costly to inline (cost=" + << NV("Cost", IC.getCost()) << ", threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return false; + } + + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) + << NV("Callee", Callee) << " can be partially inlined into " + << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) + << " (threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return true; +} + +Function *PartialInlinerImpl::unswitchFunction(Function *F) { + + if (F->hasAddressTaken()) + return nullptr; + + std::unique_ptr<FunctionOutliningInfo> OutliningInfo = + computeOutliningInfo(F); + + if (!OutliningInfo) return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); - DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); - BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]); - BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]); - BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]); + BasicBlock *NewReturnBlock = + cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]); + BasicBlock *NewNonReturnBlock = + cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]); + DenseSet<BasicBlock *> NewEntries; + for (BasicBlock *BB : OutliningInfo->Entries) { + NewEntries.insert(cast<BasicBlock>(VMap[BB])); + } // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(DuplicateFunction); + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast<PHINode>(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = NewReturnBlock; - NewReturnBlock = NewReturnBlock->splitBasicBlock( - NewReturnBlock->getFirstNonPHI()->getIterator()); - BasicBlock::iterator I = PreReturn->begin(); - Instruction *Ins = &NewReturnBlock->front(); - while (I != PreReturn->end()) { - PHINode *OldPhi = dyn_cast<PHINode>(I); - if (!OldPhi) - break; - - PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); - OldPhi->replaceAllUsesWith(RetPhi); - Ins = NewReturnBlock->getFirstNonPHI(); - - RetPhi->addIncoming(&*I, PreReturn); - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), - NewEntryBlock); - OldPhi->removeIncomingValue(NewEntryBlock); - - ++I; + // only split block when necessary: + PHINode *FirstPhi = getFirstPHI(PreReturn); + unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size(); + if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { + + NewReturnBlock = NewReturnBlock->splitBasicBlock( + NewReturnBlock->getFirstNonPHI()->getIterator()); + BasicBlock::iterator I = PreReturn->begin(); + Instruction *Ins = &NewReturnBlock->front(); + while (I != PreReturn->end()) { + PHINode *OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) + break; + + PHINode *RetPhi = + PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); + OldPhi->replaceAllUsesWith(RetPhi); + Ins = NewReturnBlock->getFirstNonPHI(); + + RetPhi->addIncoming(&*I, PreReturn); + for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) { + BasicBlock *NewE = cast<BasicBlock>(VMap[E]); + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); + OldPhi->removeIncomingValue(NewE); + } + ++I; + } + for (auto E : OutliningInfo->ReturnBlockPreds) { + BasicBlock *NewE = cast<BasicBlock>(VMap[E]); + NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + } } - NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + // Returns true if the block is to be partial inlined into the caller + // (i.e. not to be extracted to the out of line function) + auto ToBeInlined = [=](BasicBlock *BB) { + return BB == NewReturnBlock || NewEntries.count(BB); + }; // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; ToExtract.push_back(NewNonReturnBlock); for (BasicBlock &BB : *DuplicateFunction) - if (&BB != NewEntryBlock && &BB != NewReturnBlock && - &BB != NewNonReturnBlock) + if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock) ToExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. @@ -183,16 +452,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { if (IsLimitReached()) continue; - NumPartialInlining++; OptimizationRemarkEmitter ORE(CS.getCaller()); + if (!shouldPartialInline(CS, ORE)) + continue; + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); BasicBlock *Block = CS.getParent(); ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block) << ore::NV("Callee", F) << " partially inlined into " << ore::NV("Caller", CS.getCaller())); + InlineFunctionInfo IFI(nullptr, GetAssumptionCache); InlineFunction(CS, IFI); + NumPartialInlining++; + // update stats + NumPartialInlined++; } // Ditch the duplicate, since we're done with it, and rewrite all remaining @@ -200,7 +475,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { DuplicateFunction->replaceAllUsesWith(F); DuplicateFunction->eraseFromParent(); - ++NumPartialInlined; return ExtractedFunction; } @@ -246,6 +520,8 @@ char PartialInlinerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) @@ -256,12 +532,25 @@ ModulePass *llvm::createPartialInliningPass() { PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - if (PartialInlinerImpl(IFI).run(M)) + + std::function<BlockFrequencyInfo &(Function &)> GetBFI = + [&FAM](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult<BlockFrequencyAnalysis>(F); + }; + + std::function<TargetTransformInfo &(Function &)> GetTTI = + [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult<TargetIRAnalysis>(F); + }; + + ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); + + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 0d5910ebbfcc9..203594572618a 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Vectorize.h" using namespace llvm; @@ -137,14 +138,19 @@ static cl::opt<int> PreInlineThreshold( "(default = 75)")); static cl::opt<bool> EnableGVNHoist( - "enable-gvn-hoist", cl::init(true), cl::Hidden, - cl::desc("Enable the GVN hoisting pass (default = on)")); + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt<bool> DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), cl::Hidden, cl::desc("Disable shrink-wrap library calls")); +static cl::opt<bool> + EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false), + cl::Hidden, + cl::desc("Enable the simple loop unswitch pass.")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -318,7 +324,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + if (EnableSimpleLoopUnswitch) + MPM.add(createSimpleLoopUnswitchLegacyPass()); + else + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 9801a0a614165..d3a3c24ce7b44 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -30,42 +30,11 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; namespace { -// Produce a unique identifier for this module by taking the MD5 sum of the -// names of the module's strong external symbols. This identifier is -// normally guaranteed to be unique, or the program would fail to link due to -// multiply defined symbols. -// -// If the module has no strong external symbols (such a module may still have a -// semantic effect if it performs global initialization), we cannot produce a -// unique identifier for this module, so we return the empty string, which -// causes the entire module to be written as a regular LTO module. -std::string getModuleId(Module *M) { - MD5 Md5; - bool ExportsSymbols = false; - for (auto &GV : M->global_values()) { - if (GV.isDeclaration() || GV.getName().startswith("llvm.") || - !GV.hasExternalLinkage()) - continue; - ExportsSymbols = true; - Md5.update(GV.getName()); - Md5.update(ArrayRef<uint8_t>{0}); - } - - if (!ExportsSymbols) - return ""; - - MD5::MD5Result R; - Md5.final(R); - - SmallString<32> Str; - MD5::stringifyResult(R, Str); - return ("$" + Str).str(); -} - // Promote each local-linkage entity defined by ExportM and used by ImportM by // changing visibility and appending the given ModuleId. void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { @@ -251,7 +220,7 @@ void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { void splitAndWriteThinLTOBitcode( raw_ostream &OS, raw_ostream *ThinLinkOS, function_ref<AAResults &(Function &)> AARGetter, Module &M) { - std::string ModuleId = getModuleId(&M); + std::string ModuleId = getUniqueModuleId(&M); if (ModuleId.empty()) { // We couldn't generate a module ID for this module, just write it out as a // regular LTO module. diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 030461004f568..4f1f194997683 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -861,7 +861,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero, // Find the most significant known 0 other than the sign bit. int BitWidth = Op0KnownZero.getBitWidth(); APInt Op0KnownZeroTemp(Op0KnownZero); - Op0KnownZeroTemp.clearBit(BitWidth - 1); + Op0KnownZeroTemp.clearSignBit(); int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1; int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1; @@ -1037,7 +1037,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + I.hasNoUnsignedWrap(), SQ)) return replaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc @@ -1358,8 +1358,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = - SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); if (isa<Constant>(RHS)) @@ -1550,7 +1549,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + I.hasNoUnsignedWrap(), SQ)) return replaceInstUsesWith(I, V); // (A*B)-(A*C) -> A*(B-C) etc @@ -1756,8 +1755,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = - SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); // fsub nsz 0, X ==> fsub nsz -0.0, X diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a97b5a9ec0bb8..c7092bf3a398b 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1213,7 +1213,7 @@ static Instruction *foldAndToXor(BinaryOperator &I, // (~B | A) & (~A | B) --> ~(A ^ B) // (~B | A) & (B | ~A) --> ~(A ^ B) if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B)))) + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); return nullptr; @@ -1254,7 +1254,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyAndInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -2039,7 +2039,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyOrInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -2415,7 +2415,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyXorInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); if (Instruction *NewXor = foldXorToXor(I)) @@ -2433,25 +2433,32 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); + // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. + Value *X, *Y; + + // We must eliminate the and/or (one-use) for these transforms to not increase + // the instruction count. + // ~(~X & Y) --> (X | ~Y) + // ~(Y & ~X) --> (X | ~Y) + if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) { + Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + return BinaryOperator::CreateOr(X, NotY); + } + // ~(~X | Y) --> (X & ~Y) + // ~(Y | ~X) --> (X & ~Y) + if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) { + Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + return BinaryOperator::CreateAnd(X, NotY); + } + // Is this a 'not' (~) fed by a binary operator? BinaryOperator *NotOp; if (match(&I, m_Not(m_BinOp(NotOp)))) { if (NotOp->getOpcode() == Instruction::And || NotOp->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(NotOp->getOperand(1))) - NotOp->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) { - Value *NotY = Builder->CreateNot( - NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not"); - if (NotOp->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } - - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law + // Apply DeMorgan's Law when inverts are free: + // ~(X & Y) --> (~X | ~Y) + // ~(X | Y) --> (~X & ~Y) if (IsFreeToInvert(NotOp->getOperand(0), NotOp->getOperand(0)->hasOneUse()) && IsFreeToInvert(NotOp->getOperand(1), diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 313ab13b9e2b7..e9286b1bf1750 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -379,7 +379,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, for (unsigned i = 0; i != NumSubElts; ++i) { unsigned SubEltIdx = (NumSubElts - 1) - i; auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); - Count = Count.shl(BitWidth); + Count <<= BitWidth; Count |= SubElt->getValue().zextOrTrunc(64); } } @@ -1384,17 +1384,17 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // Create a mask for bits above (ctlz) or below (cttz) the first known one. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; - unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros() - : Known.One.countLeadingZeros(); - APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits) - : APInt::getHighBitsSet(BitWidth, NumMaskBits); + unsigned PossibleZeros = IsTZ ? Known.One.countTrailingZeros() + : Known.One.countLeadingZeros(); + unsigned DefiniteZeros = IsTZ ? Known.Zero.countTrailingOnes() + : Known.Zero.countLeadingOnes(); // If all bits above (ctlz) or below (cttz) the first known one are known // zero, this value is constant. // FIXME: This should be in InstSimplify because we're replacing an // instruction with a constant. - if (Mask.isSubsetOf(Known.Zero)) { - auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits)); + if (PossibleZeros == DefiniteZeros) { + auto *C = ConstantInt::get(IT, DefiniteZeros); return IC.replaceInstUsesWith(II, C); } @@ -1818,8 +1818,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto Args = CI.arg_operands(); - if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL, - &TLI, &DT, &AC)) + if (Value *V = + SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ)) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index d846a631b96f6..60970775de63b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -190,8 +190,8 @@ static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known, Max = Known.One|UnknownBits; if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.setBit(Min.getBitWidth()-1); - Max.clearBit(Max.getBitWidth()-1); + Min.setSignBit(); + Max.clearSignBit(); } } @@ -4269,8 +4269,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Changed = true; } - if (Value *V = - SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I)) + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // comparing -val or val with non-zero is the same as just comparing val @@ -4778,8 +4778,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, - I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I)) + if (Value *V = + SimplifyFCmpInst(I.getPredicate(), Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Simplify 'fcmp pred X, X' diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 776686d3d1171..3be6419a129a4 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -17,9 +17,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -27,10 +29,9 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/IR/DIBuilder.h" #define DEBUG_TYPE "instcombine" @@ -193,7 +194,7 @@ private: TargetLibraryInfo &TLI; DominatorTree &DT; const DataLayout &DL; - + const SimplifyQuery SQ; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. LoopInfo *LI; @@ -203,11 +204,11 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, - AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) + AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, + const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), LI(LI), MadeIRChange(false) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// @@ -533,6 +534,12 @@ private: /// value, or null if it didn't simplify. Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); + /// This tries to simplify binary operations by factorizing out common terms + /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). + Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &, + Instruction::BinaryOps, Value *, Value *, Value *, + Value *); + /// \brief Attempts to replace V with a simpler value based on the demanded /// bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known, diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index ce66581a491a0..face9d9237ae1 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -179,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyMulInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -606,8 +606,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (isa<Constant>(Op0)) std::swap(Op0, Op1); - if (Value *V = - SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); bool AllowReassociate = I.hasUnsafeAlgebra(); @@ -1111,7 +1110,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyUDivInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1184,7 +1183,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySDivInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1296,8 +1295,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); if (isa<Constant>(Op0)) @@ -1481,7 +1479,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyURemInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); if (Instruction *common = commonIRemTransforms(I)) @@ -1524,7 +1522,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySRemInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // Handle the integer rem common cases @@ -1597,8 +1595,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); // Handle cases involving: rem X, (select Cond, Y, Z) diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 85e5b6ba2dc2f..1117c11f4f51d 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyInstruction(&PN, SQ)) return replaceInstUsesWith(PN, V); if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 76829c5e457bf..7afb8814fe52d 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1121,8 +1121,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); - if (Value *V = - SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ)) return replaceInstUsesWith(SI, V); if (Instruction *I = canonicalizeSelectToShuffle(SI)) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index f77d713b9b071..219effce7ba56 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -520,7 +520,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + I.hasNoUnsignedWrap(), SQ)) return replaceInstUsesWith(I, V); if (Instruction *V = commonShiftTransforms(I)) @@ -618,7 +618,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ)) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) @@ -702,7 +702,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ)) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8d0ed8532779e..0195c5e727c9a 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -589,12 +589,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero)) + if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero)) Known.Zero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One)) + if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One)) Known.One |= ~LowBits; assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); @@ -607,8 +607,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask.isSignBitSet()) { computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. - if (LHSKnown.Zero.isSignBitSet()) - Known.Zero.setSignBit(); + if (LHSKnown.isNonNegative()) + Known.makeNonNegative(); } break; case Instruction::URem: { @@ -1537,7 +1537,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); - LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum)); + LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum); UndefElts |= LaneElts; } } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e89b400a4afc8..7fc6774f1849c 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -144,8 +144,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - if (Value *V = SimplifyExtractElementInst( - EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(), + EI.getIndexOperand(), SQ)) return replaceInstUsesWith(EI, V); // If vector val is constant with all elements the same, replace EI with @@ -1140,8 +1140,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector<int, 16> Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); - if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), - SVI.getType(), DL, &TLI, &DT, &AC)) + if (auto *V = + SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ)) return replaceInstUsesWith(SVI, V); bool MadeChange = false; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 4729c79ca4c3d..1eb98b18bfb50 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); @@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) { // It simplifies to V. Form "V op C". I.setOperand(0, V); I.setOperand(1, C); @@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { // It simplifies to V. Form "V op B". I.setOperand(0, V); I.setOperand(1, B); @@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { // It simplifies to V. Form "B op V". I.setOperand(0, B); I.setOperand(1, V); @@ -498,10 +498,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -static Value *tryFactorization(InstCombiner::BuilderTy *Builder, - const DataLayout &DL, BinaryOperator &I, - Instruction::BinaryOps InnerOpcode, Value *A, - Value *B, Value *C, Value *D) { +Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, + BinaryOperator &I, + Instruction::BinaryOps InnerOpcode, + Value *A, Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); Value *V = nullptr; @@ -521,7 +521,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, B, D, DL); + V = SimplifyBinOp(TopLevelOpcode, B, D, SQ); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) @@ -540,7 +540,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, A, C, DL); + V = SimplifyBinOp(TopLevelOpcode, A, C, SQ); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. @@ -610,23 +610,23 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. if (Op0 && Op1 && LHSOpcode == RHSOpcode) - if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D)) + if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D)) return V; // The instruction has the form "(A op' B) op (C)". Try to factorize common // term. if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) - if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS, - Ident)) + if (Value *V = + tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common // term. if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) - if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, Ident, - C, D)) + if (Value *V = + tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D)) return V; } @@ -638,8 +638,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) { + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) { // They do! Return "L op' R". ++NumExpand; C = Builder->CreateBinOp(InnerOpcode, L, R); @@ -655,8 +655,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) { + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) { // They do! Return "L op' R". ++NumExpand; A = Builder->CreateBinOp(InnerOpcode, L, R); @@ -672,14 +672,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (SI0->getCondition() == SI1->getCondition()) { Value *SI = nullptr; if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue(), DL, &TLI, &DT, &AC)) + SI1->getFalseValue(), SQ)) SI = Builder->CreateSelect(SI0->getCondition(), Builder->CreateBinOp(TopLevelOpcode, SI0->getTrueValue(), SI1->getTrueValue()), V); if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), - SI1->getTrueValue(), DL, &TLI, &DT, &AC)) + SI1->getTrueValue(), SQ)) SI = Builder->CreateSelect( SI0->getCondition(), V, Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), @@ -1399,8 +1399,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = - SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ)) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -1589,7 +1588,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (SO1->getType() != GO1->getType()) return nullptr; - Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC); + Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) @@ -1949,9 +1948,9 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI, return isAllocLikeFn(V, TLI) && V != AI; } -static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, - const TargetLibraryInfo *TLI) { +static bool isAllocSiteRemovable(Instruction *AI, + SmallVectorImpl<WeakTrackingVH> &Users, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 4> Worklist; Worklist.push_back(AI); @@ -2035,7 +2034,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. - SmallVector<WeakVH, 64> Users; + SmallVector<WeakTrackingVH, 64> Users; if (isAllocSiteRemovable(&MI, Users, &TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { // Lowering all @llvm.objectsize calls first because they may @@ -2304,8 +2303,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); - if (Value *V = - SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ)) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b866958e3c4b8..b034ccc469338 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -101,6 +101,10 @@ static const char *const kAsanRegisterImageGlobalsName = "__asan_register_image_globals"; static const char *const kAsanUnregisterImageGlobalsName = "__asan_unregister_image_globals"; +static const char *const kAsanRegisterElfGlobalsName = + "__asan_register_elf_globals"; +static const char *const kAsanUnregisterElfGlobalsName = + "__asan_unregister_elf_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init"; @@ -120,8 +124,11 @@ static const char *const kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; static const char *const kAsanUnpoisonStackMemoryName = "__asan_unpoison_stack_memory"; + +// ASan version script has __asan_* wildcard. Triple underscore prevents a +// linker (gold) warning about attempting to export a local symbol. static const char *const kAsanGlobalsRegisteredFlagName = - "__asan_globals_registered"; + "___asan_globals_registered"; static const char *const kAsanOptionDetectUseAfterReturn = "__asan_option_detect_stack_use_after_return"; @@ -270,6 +277,13 @@ static cl::opt<bool> "code stripping of globals"), cl::Hidden, cl::init(true)); +// This is on by default even though there is a bug in gold: +// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 +static cl::opt<bool> + ClWithComdat("asan-with-comdat", + cl::desc("Place ASan constructors in comdat sections"), + cl::Hidden, cl::init(true)); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); @@ -607,10 +621,14 @@ public: private: void initializeCallbacks(Module &M); - bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); + bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat); void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, ArrayRef<Constant *> MetadataInitializers); + void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers, + const std::string &UniqueModuleId); void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, ArrayRef<Constant *> MetadataInitializers); @@ -621,7 +639,8 @@ private: GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, StringRef OriginalName); - void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); + void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata, + StringRef InternalSuffix); IRBuilder<> CreateAsanModuleDtor(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); @@ -647,6 +666,11 @@ private: Function *AsanUnregisterGlobals; Function *AsanRegisterImageGlobals; Function *AsanUnregisterImageGlobals; + Function *AsanRegisterElfGlobals; + Function *AsanUnregisterElfGlobals; + + Function *AsanCtorFunction = nullptr; + Function *AsanDtorFunction = nullptr; }; // Stack poisoning does not play well with exception handling. @@ -1431,8 +1455,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, GlobalValue *ModuleName) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return; + + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!CA) + return; - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); for (Use &OP : CA->operands()) { if (isa<ConstantAggregateZero>(OP)) continue; ConstantStruct *CS = cast<ConstantStruct>(OP); @@ -1594,12 +1623,22 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy)); AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); + + AsanRegisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage); + + AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage); } // Put the metadata and the instrumented global in the same group. This ensures // that the metadata is discarded if the instrumented global is discarded. void AddressSanitizerModule::SetComdatForGlobalMetadata( - GlobalVariable *G, GlobalVariable *Metadata) { + GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) { Module &M = *G->getParent(); Comdat *C = G->getComdat(); if (!C) { @@ -1609,7 +1648,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata( assert(G->hasLocalLinkage()); G->setName(Twine(kAsanGenPrefix) + "_anon_global"); } - C = M.getOrInsertComdat(G->getName()); + + if (!InternalSuffix.empty() && G->hasLocalLinkage()) { + std::string Name = G->getName(); + Name += InternalSuffix; + C = M.getOrInsertComdat(Name); + } else { + C = M.getOrInsertComdat(G->getName()); + } + // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. if (TargetTriple.isOSBinFormatCOFF()) C->setSelectionKind(Comdat::NoDuplicates); @@ -1636,11 +1683,10 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, } IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { - Function *AsanDtorFunction = + AsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*C), false), GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); } @@ -1665,8 +1711,67 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF( "global metadata will not be padded appropriately"); Metadata->setAlignment(SizeOfGlobalStruct); - SetComdatForGlobalMetadata(G, Metadata); + SetComdatForGlobalMetadata(G, Metadata, ""); + } +} + +void AddressSanitizerModule::InstrumentGlobalsELF( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers, + const std::string &UniqueModuleId) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + + SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size()); + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, MetadataInitializers[i], G->getName()); + MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); + Metadata->setMetadata(LLVMContext::MD_associated, MD); + MetadataGlobals[i] = Metadata; + + SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId); } + + // Update llvm.compiler.used, adding the new metadata globals. This is + // needed so that during LTO these variables stay alive. + if (!MetadataGlobals.empty()) + appendToCompilerUsed(M, MetadataGlobals); + + // RegisteredFlag serves two purposes. First, we can pass it to dladdr() + // to look up the loaded image that contains it. Second, we can store in it + // whether registration has already occurred, to prevent duplicate + // registration. + // + // Common linkage ensures that there is only one global per shared library. + GlobalVariable *RegisteredFlag = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::CommonLinkage, + ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); + + // Create start and stop symbols. + GlobalVariable *StartELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__start_" + getGlobalMetadataSection()); + StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + GlobalVariable *StopELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__stop_" + getGlobalMetadataSection()); + StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + + // Create a call to register the globals with the runtime. + IRB.CreateCall(AsanRegisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); } void AddressSanitizerModule::InstrumentGlobalsMachO( @@ -1756,7 +1861,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray( // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { +// Sets *CtorComdat to true if the global registration code emitted into the +// asan constructor is comdat-compatible. +bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) { + *CtorComdat = false; GlobalsMD.init(M); SmallVector<GlobalVariable *, 16> GlobalsToChange; @@ -1766,7 +1874,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { } size_t n = GlobalsToChange.size(); - if (n == 0) return false; + if (n == 0) { + *CtorComdat = true; + return false; + } auto &DL = M.getDataLayout(); @@ -1911,7 +2022,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Initializers[i] = Initializer; } - if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { + std::string ELFUniqueModuleId = + (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M) + : ""; + + if (!ELFUniqueModuleId.empty()) { + InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId); + *CtorComdat = true; + } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) { InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); @@ -1938,17 +2056,36 @@ bool AddressSanitizerModule::runOnModule(Module &M) { if (CompileKernel) return false; - Function *AsanCtorFunction; + // Create a module constructor. A destructor is created lazily because not all + // platforms, and not all modules need it. std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + bool CtorComdat = true; bool Changed = false; // TODO(glider): temporarily disabled globals instrumentation for KASan. if (ClGlobals) { IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); - Changed |= InstrumentGlobals(IRB, M); + Changed |= InstrumentGlobals(IRB, M, &CtorComdat); + } + + // Put the constructor and destructor in comdat if both + // (1) global instrumentation is not TU-specific + // (2) target is ELF. + if (ClWithComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { + AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, + AsanCtorFunction); + if (AsanDtorFunction) { + AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority, + AsanDtorFunction); + } + } else { + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + if (AsanDtorFunction) + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); } return Changed; @@ -2586,7 +2723,7 @@ void FunctionStackPoisoner::processStaticAllocas() { Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); - replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false); + replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref); AI->replaceAllUsesWith(NewAllocaPtr); } diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index d7eb857cff7e1..493d014586c6a 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -771,7 +771,7 @@ public: if (perform(MI)) { Changed = true; ++NumOfPGOMemOPOpt; - DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName() + DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() << "is Transformed.\n"); } } @@ -863,13 +863,23 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { ActualCount = *BBEdgeCount; } + ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); + DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount + << "\n"); + DEBUG( + for (auto &VD + : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); + if (ActualCount < MemOPCountThreshold) return false; + // Skip if the total value profiled count is 0, in which case we can't + // scale up the counts properly (and there is no profitable transformation). + if (TotalCount == 0) + return false; - ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); TotalCount = ActualCount; if (MemOPScaleCount) - DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount + DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount << " denominator = " << SavedTotalCount << "\n"); // Keeping track of the count of the default case: @@ -915,14 +925,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { MaxCount = RemainCount; uint64_t SumForOpt = TotalCount - RemainCount; - DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs " - << TotalCount << "\n"); - DEBUG( - for (auto &VD - : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version - << " Versions\n"); + << " Versions (covering " << SumForOpt << " out of " + << TotalCount << ")\n"); // mem_op(..., size) // ==> diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 190f05db4b0c1..3e480a6df446d 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2643,7 +2643,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { "ByVal argument is not a pointer!"); Size = DL.getTypeAllocSize(A->getType()->getPointerElementType()); if (ArgOffset + Size > kParamTLSSize) break; - unsigned ParamAlignment = CS.getParamAlignment(i + 1); + unsigned ParamAlignment = CS.getParamAlignment(i); unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); Store = IRB.CreateMemCpy(ArgShadowBase, getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), @@ -3502,7 +3502,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper { assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); - uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1); + uint64_t ArgAlign = CS.getParamAlignment(ArgNo); if (ArgAlign < 8) ArgAlign = 8; VAArgOffset = alignTo(VAArgOffset, ArgAlign); diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index f02b75f0b4560..cd9b3d96a14f7 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -69,6 +69,19 @@ static inline void EraseInstruction(Instruction *CI) { RecursivelyDeleteTriviallyDeadInstructions(OldArg); } +/// If Inst is a ReturnRV and its operand is a call or invoke, return the +/// operand. Otherwise return null. +static inline const Instruction *getreturnRVOperand(const Instruction &Inst, + ARCInstKind Class) { + if (Class != ARCInstKind::RetainRV) + return nullptr; + + const auto *Opnd = Inst.getOperand(0)->stripPointerCasts(); + if (const auto *C = dyn_cast<CallInst>(Opnd)) + return C; + return dyn_cast<InvokeInst>(Opnd); +} + } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index c1bbc4e96b16d..d13e941044f14 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -244,6 +244,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, ARCInstKind Class) { + auto SetSeqAndInsertReverseInsertPt = [&](Sequence NewSeq){ + assert(!HasReverseInsertPts()); + SetSeq(NewSeq); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa<InvokeInst>(Inst)) + InsertReverseInsertPt(&*BB->getFirstInsertionPt()); + else + InsertReverseInsertPt(&*++Inst->getIterator()); + }; + // Check for possible direct uses. switch (GetSeq()) { case S_Release: @@ -251,26 +263,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, if (CanUse(Inst, Ptr, PA, Class)) { DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr << "\n"); - assert(!HasReverseInsertPts()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa<InvokeInst>(Inst)) - InsertReverseInsertPt(&*BB->getFirstInsertionPt()); - else - InsertReverseInsertPt(&*++Inst->getIterator()); - SetSeq(S_Use); + SetSeqAndInsertReverseInsertPt(S_Use); } else if (Seq == S_Release && IsUser(Class)) { DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; " << *Ptr << "\n"); // Non-movable releases depend on any possible objc pointer use. - SetSeq(S_Stop); - assert(!HasReverseInsertPts()); - // As above; handle invoke specially. - if (isa<InvokeInst>(Inst)) - InsertReverseInsertPt(&*BB->getFirstInsertionPt()); - else - InsertReverseInsertPt(&*++Inst->getIterator()); + SetSeqAndInsertReverseInsertPt(S_Stop); + } else if (const auto *Call = getreturnRVOperand(*Inst, Class)) { + if (CanUse(Call, Ptr, PA, GetBasicARCInstKind(Call))) { + DEBUG(dbgs() << " ReleaseUse: Seq: " << GetSeq() << "; " + << *Ptr << "\n"); + SetSeqAndInsertReverseInsertPt(S_Stop); + } } break; case S_Stop: diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b323ab3bd443c..523390758769a 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -55,6 +55,7 @@ add_llvm_library(LLVMScalarOpts Scalar.cpp Scalarizer.cpp SeparateConstOffsetFromGEP.cpp + SimpleLoopUnswitch.cpp SimplifyCFGPass.cpp Sink.cpp SpeculativeExecution.cpp diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index b5a4cc2f39533..dc864f48bf1f8 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, Changed = true; } - if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) { + if (Value *V = SimplifyInstruction(P, SQ)) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; @@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { return false; LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; - auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>(); - auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; - const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); - return runImpl(F, LVI, SQ); + return runImpl(F, LVI, getBestSimplifyQuery(*this, F)); } PreservedAnalyses CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); - auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); - auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); - auto *AC = AM.getCachedResult<AssumptionAnalysis>(F); - const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); - bool Changed = runImpl(F, LVI, SQ); + bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F)); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 04479b6e49ac8..d8f8a58a5fdfa 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -253,6 +253,7 @@ public: const TargetTransformInfo &TTI; DominatorTree &DT; AssumptionCache &AC; + const SimplifyQuery SQ; MemorySSA *MSSA; std::unique_ptr<MemorySSAUpdater> MSSAUpdater; typedef RecyclingAllocator< @@ -315,9 +316,10 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, - DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) - : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), + EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, DominatorTree &DT, + AssumptionCache &AC, MemorySSA *MSSA) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) { } @@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { /// stores which can occur in bitfield code among other things. Instruction *LastStore = nullptr; - const DataLayout &DL = BB->getModule()->getDataLayout(); - // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { @@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Skip assume intrinsics, they don't really have side effects (although // they're marked as such to ensure preservation of control dependencies), - // and this pass will not disturb any of the assumption's control - // dependencies. + // and this pass will not bother with its removal. However, we should mark + // its condition as true for all dominated blocks. if (match(Inst, m_Intrinsic<Intrinsic::assume>())) { - DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); + auto *CondI = + dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0)); + if (CondI && SimpleValue::canHandle(CondI)) { + DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n'); + AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } else + DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); continue; } @@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) { if (auto *CondI = dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) { - // The condition we're on guarding here is true for all dominated - // locations. - if (SimpleValue::canHandle(CondI)) + if (SimpleValue::canHandle(CondI)) { + // Do we already know the actual value of this condition? + if (auto *KnownCond = AvailableValues.lookup(CondI)) { + // Is the condition known to be true? + if (isa<ConstantInt>(KnownCond) && + cast<ConstantInt>(KnownCond)->isOneValue()) { + DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + continue; + } else + // Use the known value if it wasn't true. + cast<CallInst>(Inst)->setArgOperand(0, KnownCond); + } + // The condition we're on guarding here is true for all dominated + // locations. AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } } // Guard intrinsics read all memory, but don't write any memory. @@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { + if (Value *V = SimplifyInstruction(Inst, SQ)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); bool Killed = false; if (!Inst->use_empty()) { @@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F, auto *MSSA = UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); if (!CSE.run()) return PreservedAnalyses::all(); @@ -1008,7 +1029,7 @@ public: auto *MSSA = UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); return CSE.run(); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index be696df548d52..c04646eed49a8 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) { // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. const DataLayout &DL = I->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { + if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) { bool Changed = false; if (!I->use_empty()) { I->replaceAllUsesWith(V); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index dcb2a4a0c6e6b..3953198fe6052 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -97,7 +97,7 @@ class IndVarSimplify { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; bool Changed = false; bool isValidRewrite(Value *FromVal, Value *ToVal); @@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { Compare->getName()); // In the following deletions, PN may become dead and may be deleted. - // Use a WeakVH to observe whether this happens. - WeakVH WeakPH = PN; + // Use a WeakTrackingVH to observe whether this happens. + WeakTrackingVH WeakPH = PN; // Delete the old floating point exit comparison. The branch starts using the // new comparison. @@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { // BasicBlock *Header = L->getHeader(); - SmallVector<WeakVH, 8> PHIs; + SmallVector<WeakTrackingVH, 8> PHIs; for (BasicBlock::iterator I = Header->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) PHIs.push_back(PN); @@ -901,7 +901,7 @@ class WidenIV { PHINode *WidePhi; Instruction *WideInc; const SCEV *WideIncExpr; - SmallVectorImpl<WeakVH> &DeadInsts; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; SmallPtrSet<Instruction *,16> Widened; SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; @@ -941,20 +941,13 @@ class WidenIV { } public: - WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, - ScalarEvolution *SEv, DominatorTree *DTree, - SmallVectorImpl<WeakVH> &DI, bool HasGuards) : - OrigPhi(WI.NarrowIV), - WideType(WI.WidestNativeType), - LI(LInfo), - L(LI->getLoopFor(OrigPhi->getParent())), - SE(SEv), - DT(DTree), - HasGuards(HasGuards), - WidePhi(nullptr), - WideInc(nullptr), - WideIncExpr(nullptr), - DeadInsts(DI) { + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 9e2563879da2b..5e116ef2fe75e 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -138,7 +138,7 @@ private: // Tries to infer the specific address space of each address expression in // Postorder. - void inferAddressSpaces(const std::vector<Value *> &Postorder, + void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const; bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; @@ -147,7 +147,7 @@ private: // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. bool - rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder, + rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; @@ -162,7 +162,7 @@ private: std::vector<std::pair<Value *, bool>> &PostorderStack, DenseSet<Value *> &Visited) const; - std::vector<Value *> collectFlatAddressExpressions(Function &F) const; + std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; Value *cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, @@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, DenseSet<Value *> &Visited) const { assert(V->getType()->isPointerTy()); + + // Generic addressing expressions may be hidden in nested constant + // expressions. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + // TODO: Look in non-address parts, like icmp operands. + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.push_back(std::make_pair(CE, false)); + + return; + } + if (isAddressExpression(*V) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { - if (Visited.insert(V).second) + if (Visited.insert(V).second) { PostorderStack.push_back(std::make_pair(V, false)); + + Operator *Op = cast<Operator>(V); + for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) { + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.emplace_back(CE, false); + } + } + } } } // Returns all flat address expressions in function F. The elements are ordered // ordered in postorder. -std::vector<Value *> +std::vector<WeakTrackingVH> InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // This function implements a non-recursive postorder traversal of a partial // use-def graph of function F. @@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { PushPtrOperand(Cmp->getOperand(0)); PushPtrOperand(Cmp->getOperand(1)); } + } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { + if (!ASC->getType()->isVectorTy()) + PushPtrOperand(ASC->getPointerOperand()); } } - std::vector<Value *> Postorder; // The resultant postorder. + std::vector<WeakTrackingVH> Postorder; // The resultant postorder. while (!PostorderStack.empty()) { + Value *TopVal = PostorderStack.back().first; // If the operands of the expression on the top are already explored, // adds that expression to the resultant postorder. if (PostorderStack.back().second) { - Postorder.push_back(PostorderStack.back().first); + Postorder.push_back(TopVal); PostorderStack.pop_back(); continue; } // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().second = true; - for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { + for (Value *PtrOperand : getPointerOperands(*TopVal)) { appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, Visited); } @@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return false; // Collects all flat address expressions in postorder. - std::vector<Value *> Postorder = collectFlatAddressExpressions(F); + std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F); // Runs a data-flow analysis to refine the address spaces of every expression // in Postorder. @@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); } +// Constants need to be tracked through RAUW to handle cases with nested +// constant expressions, so wrap values in WeakTrackingVH. void InferAddressSpaces::inferAddressSpaces( - const std::vector<Value *> &Postorder, + ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const { SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); // Initially, all expressions are in the uninitialized address space. @@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I, } bool InferAddressSpaces::rewriteWithNewAddressSpaces( - const std::vector<Value *> &Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { + ArrayRef<WeakTrackingVH> Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer // operands are converted, the clone is naturally in the new address space by @@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); } + SmallVector<Instruction *, 16> DeadInstructions; + // Replaces the uses of the old address expressions with the new ones. - for (Value *V : Postorder) { + for (const WeakTrackingVH &WVH : Postorder) { + assert(WVH && "value was unexpectedly deleted"); + Value *V = WVH; Value *NewV = ValueWithNewAddrSpace.lookup(V); if (NewV == nullptr) continue; @@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " << *NewV << '\n'); + if (Constant *C = dyn_cast<Constant>(V)) { + Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), + C->getType()); + if (C != Replace) { + DEBUG(dbgs() << "Inserting replacement const cast: " + << Replace << ": " << *Replace << '\n'); + C->replaceAllUsesWith(Replace); + V = Replace; + } + } + Value::use_iterator I, E, Next; for (I = V->use_begin(), E = V->use_end(); I != E; ) { Use &U = *I; @@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } + if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) { + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (ASC->getDestAddressSpace() == NewAS) { + ASC->replaceAllUsesWith(NewV); + DeadInstructions.push_back(ASC); + continue; + } + } + // Otherwise, replaces the use with flat(NewV). if (Instruction *I = dyn_cast<Instruction>(V)) { BasicBlock::iterator InsertPos = std::next(I->getIterator()); @@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } - if (V->use_empty()) - RecursivelyDeleteTriviallyDeadInstructions(V); + if (V->use_empty()) { + if (Instruction *I = dyn_cast<Instruction>(V)) + DeadInstructions.push_back(I); + } } + for (Instruction *I : DeadInstructions) + RecursivelyDeleteTriviallyDeadInstructions(I); + return true; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index a0da81605a809..7dacaba1193e8 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( Value *LHS = PN->getIncomingValue(i); Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); - Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL}); if (!Res) { if (!isa<Constant>(RHS)) continue; @@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + Constant *OnlyVal = nullptr; + Constant *MultipleVal = (Constant *)(intptr_t)~0ULL; + unsigned PredWithKnownDest = 0; for (const auto &PredValue : PredValues) { BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. - // If the predecessor ends with an indirect goto, we can't change its - // destination. - if (isa<IndirectBrInst>(Pred->getTerminator())) - continue; - Constant *Val = PredValue.first; BasicBlock *DestBB; if (isa<UndefValue>(Val)) DestBB = nullptr; - else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + assert(isa<ConstantInt>(Val) && "Expecting a constant integer"); DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); - else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + assert(isa<ConstantInt>(Val) && "Expecting a constant integer"); DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor(); } else { assert(isa<IndirectBrInst>(BB->getTerminator()) && "Unexpected terminator"); + assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress"); DestBB = cast<BlockAddress>(Val)->getBasicBlock(); } // If we have exactly one destination, remember it for efficiency below. - if (PredToDestList.empty()) + if (PredToDestList.empty()) { OnlyDest = DestBB; - else if (OnlyDest != DestBB) - OnlyDest = MultipleDestSentinel; + OnlyVal = Val; + } else { + if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + // It possible we have same destination, but different value, e.g. default + // case in switchinst. + if (Val != OnlyVal) + OnlyVal = MultipleVal; + } + + // We know where this predecessor is going. + ++PredWithKnownDest; + + // If the predecessor ends with an indirect goto, we can't change its + // destination. + if (isa<IndirectBrInst>(Pred->getTerminator())) + continue; PredToDestList.push_back(std::make_pair(Pred, DestBB)); } @@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // not thread. By doing so, we do not need to duplicate the current block and // also miss potential opportunities in case we dont/cant duplicate. if (OnlyDest && OnlyDest != MultipleDestSentinel) { - if (PredToDestList.size() == + if (PredWithKnownDest == (size_t)std::distance(pred_begin(BB), pred_end(BB))) { bool SeenFirstBranchToOnlyDest = false; for (BasicBlock *SuccBB : successors(BB)) { @@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // If the condition is now dead due to the removal of the old terminator, // erase it. - auto *CondInst = dyn_cast<Instruction>(Cond); - if (CondInst && CondInst->use_empty()) - CondInst->eraseFromParent(); - // FIXME: in case this instruction is defined in the current BB and it - // resolves to a single value from all predecessors, we can do RAUW. + if (auto *CondInst = dyn_cast<Instruction>(Cond)) { + if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + else if (OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) { + // If we just learned Cond is the same value for all uses of the + // condition, replace it with a constant value + CondInst->replaceAllUsesWith(OnlyVal); + if (!CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + } + } return true; } } @@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // If this instruction can be simplified after the operands are updated, // just use the simplified value instead. This frequently happens due to // phi translation. - if (Value *IV = - SimplifyInstruction(New, BB->getModule()->getDataLayout())) { + if (Value *IV = SimplifyInstruction( + New, + {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; if (!New->mayHaveSideEffects()) { delete New; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5042fc18d7c49..410fbb03068f4 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { - WeakVH InstPtr(&*I); + WeakTrackingVH InstPtr(&*I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; @@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like -/// for (i) A[i] = B[i]; +/// for (i) A[i] = B[i]; bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount) { assert(SI->isSimple() && "Expected only non-volatile stores."); diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 28e71ca05436c..af095560cc025 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, DL, TLI, DT, AC); + Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC}); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 8ce96cf1b7a67..2ba9265566a8c 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C)); + Value *V = SimplifyInstruction(C, SQ); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. @@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &) { int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); - const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC); - LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ); + const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL); + LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + SQ); bool Changed = LR.processLoop(&L); if (!Changed) @@ -714,10 +715,7 @@ public: auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - const SimplifyQuery SQ(DL, TLI, DT, AC); + const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ); return LR.processLoop(L); } diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index a5a81c33a8ebd..35c05e84fd68d 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) { bool Changed = false; // Copy blocks into a temporary array to avoid iterator invalidation issues // as we remove them. - SmallVector<WeakVH, 16> Blocks(L.blocks()); + SmallVector<WeakTrackingVH, 16> Blocks(L.blocks()); for (auto &Block : Blocks) { // Attempt to merge blocks in the trivial case. Don't modify blocks which diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index af137f6faa634..ccedb98d7fa15 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S, /// If any of the instructions is the specified set are trivially dead, delete /// them and see if this makes any of their operands subsequently dead. static bool -DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { @@ -1845,7 +1845,7 @@ class LSRInstance { void FinalizeChain(IVChain &Chain); void CollectChains(); void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts); + SmallVectorImpl<WeakTrackingVH> &DeadInsts); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -1920,19 +1920,15 @@ class LSRInstance { const LSRUse &LU, SCEVExpander &Rewriter) const; - Value *Expand(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, + BasicBlock::iterator IP, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; - void Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; + void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); public: @@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. const IVInc &Head = Chain.Incs[0]; @@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). -Value *LSRInstance::Expand(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, +Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, BasicBlock::iterator IP, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; @@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU, /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI(PHINode *PN, - const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::RewriteForPHI( + PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, + SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { DenseMap<BasicBlock *, Value *> Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. -void LSRInstance::Rewrite(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { @@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution( const SmallVectorImpl<const Formula *> &Solution) { // Keep track of instructions we may have made dead, so that // we can remove them after we are done working. - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr"); @@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Rewriter(SE, DL, "lsr"); #ifndef NDEBUG diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 8fa806a7e8bcc..6ef1464e9338e 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, LoopProcessWorklist.push_back(NewLoop); redoLoop = true; - // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody + // Keep a WeakTrackingVH holding onto LIC. If the first call to + // RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. - WeakVH LICHandle(LIC); + WeakTrackingVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. @@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector<Instruction*> &Worklist, Loop *L, LPPassManager *LPM) { - DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); + DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, LPM->deleteSimpleAnalysisValue(I, L); RemoveFromWorklist(I, Worklist); I->replaceAllUsesWith(V); - I->eraseFromParent(); + if (!I->mayHaveSideEffects()) + I->eraseFromParent(); ++NumSimplify; } @@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // Simple DCE. if (isInstructionTriviallyDead(I)) { - DEBUG(dbgs() << "Remove dead instruction '" << *I); + DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a3f3f25c1e0f6..21a632073da7a 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { // Get the alignment of the byval. If the call doesn't specify the alignment, // then it is some target specific value that we can't know. - unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); + unsigned ByValAlign = CS.getParamAlignment(ArgNo); if (ByValAlign == 0) return false; // If it is greater than the memcpy, then we check to see if we can force the diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index c5bf2f28d1852..d0bfe36038973 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) { Changed = true; SE->forgetValue(&*I); I->replaceAllUsesWith(NewI); - // If SeenExprs constains I's WeakVH, that entry will be replaced with + // If SeenExprs constains I's WeakTrackingVH, that entry will be + // replaced with // nullptr. RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI); I = NewI->getIterator(); @@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. const SCEV *NewSCEV = SE->getSCEV(&*I); - SeenExprs[NewSCEV].push_back(WeakVH(&*I)); + SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I)); // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) // is equivalent to I. However, ScalarEvolution::getSCEV may // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose @@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. if (NewSCEV != OldSCEV) - SeenExprs[OldSCEV].push_back(WeakVH(&*I)); + SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I)); } } } @@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr, // future instruction either. Therefore, we pop it out of the stack. This // optimization makes the algorithm O(n). while (!Candidates.empty()) { - // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed + // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's + // removed // during rewriting. if (Value *Candidate = Candidates.back()) { Instruction *CandidateInstruction = cast<Instruction>(Candidate); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index a014ddd9ba0a4..162d91beae764 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -395,7 +395,6 @@ namespace { class NewGVN { Function &F; DominatorTree *DT; - AssumptionCache *AC; const TargetLibraryInfo *TLI; AliasAnalysis *AA; MemorySSA *MSSA; @@ -405,6 +404,7 @@ class NewGVN { BumpPtrAllocator ExpressionAllocator; ArrayRecycler<Value *> ArgRecycler; TarjanSCC SCCFinder; + const SimplifyQuery SQ; // Number of function arguments, used by ranking unsigned int NumFuncArgs; @@ -504,8 +504,9 @@ public: NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, const DataLayout &DL) - : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), - PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {} + : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), + PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) { + } bool runGVN(); private: @@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, E->op_push_back(lookupOperandLeader(Arg1)); E->op_push_back(lookupOperandLeader(Arg2)); - Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI, - DT, AC); + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V)) return SimplifiedE; return E; @@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) { "Wrong types on cmp instruction"); assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && E->getOperand(1)->getType() == I->getOperand(1)->getType())); - Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa<SelectInst>(I)) { @@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) { assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && E->getOperand(2)->getType() == I->getOperand(2)->getType()); Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), - E->getOperand(2), DL, TLI, DT, AC); + E->getOperand(2), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } } else if (I->isBinaryOp()) { - Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (auto *BI = dyn_cast<BitCastInst>(I)) { - Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC); + Value *V = + SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa<GetElementPtrInst>(I)) { - Value *V = SimplifyGEPInst(E->getType(), - ArrayRef<Value *>(E->op_begin(), E->op_end()), - DL, TLI, DT, AC); + Value *V = SimplifyGEPInst( + E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (AllConstant) { @@ -1628,15 +1628,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { if (PBranch->TrueEdge) { // If we know the previous predicate is true and we are in the true // edge then we may be implied true or false. - if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getTrue(CI->getType())); } - if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getFalse(CI->getType())); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 3dcab60907896..ef29d4141600a 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, /// Emit a tree of add instructions, summing Ops together /// and returning the result. Insert the tree before I. static Value *EmitAddTreeOfValues(Instruction *I, - SmallVectorImpl<WeakVH> &Ops){ + SmallVectorImpl<WeakTrackingVH> &Ops) { if (Ops.size() == 1) return Ops.back(); Value *V1 = Ops.back(); @@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); - SmallVector<WeakVH, 4> NewMulOps; + SmallVector<WeakTrackingVH, 4> NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. BinaryOperator *BOp = diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f344eb151464a..c11247c06b856 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. -static AttributeList legalizeCallAttributes(AttributeList AS) { - AttributeList Ret; - - for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { - unsigned Index = AS.getSlotIndex(Slot); - - if (Index == AttributeList::ReturnIndex || - Index == AttributeList::FunctionIndex) { - - for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) { - - // Do not allow certain attributes - just skip them - // Safepoint can not be read only or read none. - if (Attr.hasAttribute(Attribute::ReadNone) || - Attr.hasAttribute(Attribute::ReadOnly)) - continue; - - // These attributes control the generation of the gc.statepoint call / - // invoke itself; and once the gc.statepoint is in place, they're of no - // use. - if (isStatepointDirectiveAttr(Attr)) - continue; - - Ret = Ret.addAttributes( - AS.getContext(), Index, - AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr))); - } - } - - // Just skip parameter attributes for now - } - - return Ret; +static AttributeList legalizeCallAttributes(AttributeList AL) { + if (AL.isEmpty()) + return AL; + + // Remove the readonly, readnone, and statepoint function attributes. + AttrBuilder FnAttrs = AL.getFnAttributes(); + FnAttrs.removeAttribute(Attribute::ReadNone); + FnAttrs.removeAttribute(Attribute::ReadOnly); + for (Attribute A : AL.getFnAttributes()) { + if (isStatepointDirectiveAttr(A)) + FnAttrs.remove(A); + } + + // Just skip parameter and return attributes for now + LLVMContext &Ctx = AL.getContext(); + return AttributeList::get(Ctx, AttributeList::FunctionIndex, + AttributeSet::get(Ctx, FnAttrs)); } /// Helper function to place all gc relocates necessary for the given @@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Call->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Call->setAttributes(AttributeList::get(Call->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Call; @@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Invoke->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Invoke->setAttributes(AttributeList::get(Invoke->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Invoke; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d01e91a7f2356..1d9beffaf06bf 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -25,6 +25,7 @@ #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter Instruction *OldPtr; // Track post-rewrite users which are PHI nodes and Selects. - SmallPtrSetImpl<PHINode *> &PHIUsers; - SmallPtrSetImpl<SelectInst *> &SelectUsers; + SmallSetVector<PHINode *, 8> &PHIUsers; + SmallSetVector<SelectInst *, 8> &SelectUsers; // Utility IR builder, whose name prefix is setup for each visited use, and // the insertion point is set to point to the user. @@ -2199,8 +2200,8 @@ public: uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, - SmallPtrSetImpl<PHINode *> &PHIUsers, - SmallPtrSetImpl<SelectInst *> &SelectUsers) + SmallSetVector<PHINode *, 8> &PHIUsers, + SmallSetVector<SelectInst *, 8> &SelectUsers) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // fact scheduled for promotion. unsigned PPWOldSize = PostPromotionWorklist.size(); unsigned NumUses = 0; - SmallPtrSet<PHINode *, 8> PHIUsers; - SmallPtrSet<SelectInst *, 8> SelectUsers; + SmallSetVector<PHINode *, 8> PHIUsers; + SmallSetVector<SelectInst *, 8> SelectUsers; AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, @@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // Now that we've processed all the slices in the new partition, check if any // PHIs or Selects would block promotion. - for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), - E = PHIUsers.end(); - I != E; ++I) - if (!isSafePHIToSpeculate(**I)) { + for (PHINode *PHI : PHIUsers) + if (!isSafePHIToSpeculate(*PHI)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); break; } - for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), - E = SelectUsers.end(); - I != E; ++I) - if (!isSafeSelectToSpeculate(**I)) { + + for (SelectInst *Sel : SelectUsers) + if (!isSafeSelectToSpeculate(*Sel)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 00e3c95f6f06d..52201d8f3e51a 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" @@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeCFGSimplifyPassPass(Registry); initializeLateCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); + initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4d594532c3651..cde659b9d189f 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { // Add I to DominatingExprs if it's an add/sub that can't sign overflow. if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) || match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { - if (isKnownNotFullPoison(I)) { + if (programUndefinedIfFullPoison(I)) { const SCEV *Key = SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); DominatingExprs[Key].push_back(I); diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp new file mode 100644 index 0000000000000..fb1b47c48276c --- /dev/null +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -0,0 +1,626 @@ +//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#define DEBUG_TYPE "simple-loop-unswitch" + +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumTrivial, "Number of unswitches that are trivial"); + +static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, + Constant &Replacement) { + assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); + + // Replace uses of LIC in the loop with the given constant. + for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { + // Grab the use and walk past it so we can clobber it in the use list. + Use *U = &*UI++; + Instruction *UserI = dyn_cast<Instruction>(U->getUser()); + if (!UserI || !L.contains(UserI)) + continue; + + // Replace this use within the loop body. + *U = &Replacement; + } +} + +/// Update the dominator tree after removing one exiting predecessor of a loop +/// exit block. +static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, + DominatorTree &DT) { + assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && + "Cannot have empty predecessors of the loop exit block if we split " + "off a block to unswitch!"); + + BasicBlock *IDom = *pred_begin(LoopExitBB); + // Walk all of the other predecessors finding the nearest common dominator + // until all predecessors are covered or we reach the loop header. The loop + // header necessarily dominates all loop exit blocks in loop simplified form + // so we can early-exit the moment we hit that block. + for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); + PI != PE && IDom != L.getHeader(); ++PI) + IDom = DT.findNearestCommonDominator(IDom, *PI); + + DT.changeImmediateDominator(LoopExitBB, IDom); +} + +/// Update the dominator tree after unswitching a particular former exit block. +/// +/// This handles the full update of the dominator tree after hoisting a block +/// that previously was an exit block (or split off of an exit block) up to be +/// reached from the new immediate dominator of the preheader. +/// +/// The common case is simple -- we just move the unswitched block to have an +/// immediate dominator of the old preheader. But in complex cases, there may +/// be other blocks reachable from the unswitched block that are immediately +/// dominated by some node between the unswitched one and the old preheader. +/// All of these also need to be hoisted in the dominator tree. We also want to +/// minimize queries to the dominator tree because each step of this +/// invalidates any DFS numbers that would make queries fast. +static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, + DominatorTree &DT) { + DomTreeNode *OldPHNode = DT[OldPH]; + DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; + // If the dominator tree has already been updated for this unswitched node, + // we're done. This makes it easier to use this routine if there are multiple + // paths to the same unswitched destination. + if (UnswitchedNode->getIDom() == OldPHNode) + return; + + // First collect the domtree nodes that we are hoisting over. These are the + // set of nodes which may have children that need to be hoisted as well. + SmallPtrSet<DomTreeNode *, 4> DomChain; + for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; + IDom = IDom->getIDom()) + DomChain.insert(IDom); + + // The unswitched block ends up immediately dominated by the old preheader -- + // regardless of whether it is the loop exit block or split off of the loop + // exit block. + DT.changeImmediateDominator(UnswitchedNode, OldPHNode); + + // Blocks reachable from the unswitched block may need to change their IDom + // as well. + SmallSetVector<BasicBlock *, 4> Worklist; + for (auto *SuccBB : successors(UnswitchedBB)) + Worklist.insert(SuccBB); + + // Walk the worklist. We grow the list in the loop and so must recompute size. + for (int i = 0; i < (int)Worklist.size(); ++i) { + auto *BB = Worklist[i]; + + DomTreeNode *Node = DT[BB]; + assert(!DomChain.count(Node) && + "Cannot be dominated by a block you can reach!"); + // If this block doesn't have an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree hasn't changed. Either + // it is above the region hoisted and still valid, or it is below the + // hoisted block and so was trivially updated. This also applies to + // everything reachable from this block so we're completely done with the + // it. + if (!DomChain.count(Node->getIDom())) + continue; + + // We need to change the IDom for this node but also walk its successors + // which could have similar dominance position. + DT.changeImmediateDominator(Node, OldPHNode); + for (auto *SuccBB : successors(BB)) + Worklist.insert(SuccBB); + } +} + +/// Unswitch a trivial branch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the branch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and one of the successors is a loop exit. This +/// allows us to unswitch without duplicating the loop, making it trivial. +/// +/// If this routine fails to unswitch the branch it returns false. +/// +/// If the branch can be unswitched, this routine splits the preheader and +/// hoists the branch above that split. Preserves loop simplified form +/// (splitting the exit block as necessary). It simplifies the branch within +/// the loop to an unconditional branch but doesn't remove it entirely. Further +/// cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, + LoopInfo &LI) { + assert(BI.isConditional() && "Can only unswitch a conditional branch!"); + DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); + + Value *LoopCond = BI.getCondition(); + + // Need a trivial loop condition to unswitch. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector<BasicBlock *, 16> ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any + // side-effects. If so, determine the value of Cond that causes + // it to do this. + ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); + ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); + int LoopExitSuccIdx = 0; + auto *LoopExitBB = BI.getSuccessor(0); + if (!ExitBlockSet.count(LoopExitBB)) { + std::swap(CondVal, Replacement); + LoopExitSuccIdx = 1; + LoopExitBB = BI.getSuccessor(1); + if (!ExitBlockSet.count(LoopExitBB)) + return false; + } + auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); + assert(L.contains(ContinueBB) && + "Cannot have both successors exit and still be in the loop!"); + + // If the loop exit block contains phi nodes, this isn't trivial. + // FIXME: We should examine the PHI to determine whether or not we can handle + // it trivially. + if (isa<PHINode>(LoopExitBB->begin())) + return false; + + DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal + << " == " << LoopCond << "\n"); + + // Split the preheader, so that we know that there is a safe place to insert + // the conditional branch. We will change the preheader to have a conditional + // branch on LoopCond. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the exit block out of the loop that we are + // unswitching. We need to split this if there are other loop predecessors. + // Because the loop is in simplified form, *any* other predecessor is enough. + BasicBlock *UnswitchedBB; + if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { + (void)PredBB; + assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!"); + UnswitchedBB = LoopExitBB; + } else { + UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); + } + + BasicBlock *ParentBB = BI.getParent(); + + // Now splice the branch to gate reaching the new preheader and re-point its + // successors. + OldPH->getInstList().splice(std::prev(OldPH->end()), + BI.getParent()->getInstList(), BI); + OldPH->getTerminator()->eraseFromParent(); + BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); + BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); + + // Create a new unconditional branch that will continue the loop as a new + // terminator. + BranchInst::Create(ContinueBB, ParentBB); + + // Now we need to update the dominator tree. + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + // But if we split something off of the loop exit block then we also removed + // one of the predecessors for the loop exit block and may need to update its + // idom. + if (UnswitchedBB != LoopExitBB) + updateLoopExitIDom(LoopExitBB, L, DT); + + // Since this is an i1 condition we can also trivially replace uses of it + // within the loop with a constant. + replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); + + ++NumTrivial; + ++NumBranches; + return true; +} + +/// Unswitch a trivial switch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the switch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and that at least one of the successors is a loop +/// exit. This allows us to unswitch without duplicating the loop, making it +/// trivial. +/// +/// If this routine fails to unswitch the switch it returns false. +/// +/// If the switch can be unswitched, this routine splits the preheader and +/// copies the switch above that split. If the default case is one of the +/// exiting cases, it copies the non-exiting cases and points them at the new +/// preheader. If the default case is not exiting, it copies the exiting cases +/// and points the default at the preheader. It preserves loop simplified form +/// (splitting the exit blocks as necessary). It simplifies the switch within +/// the loop by removing now-dead cases. If the default case is one of those +/// unswitched, it replaces its destination with a new basic block containing +/// only unreachable. Such basic blocks, while technically loop exits, are not +/// considered for unswitching so this is a stable transform and the same +/// switch will not be revisited. If after unswitching there is only a single +/// in-loop successor, the switch is further simplified to an unconditional +/// branch. Still more cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, + LoopInfo &LI) { + DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); + Value *LoopCond = SI.getCondition(); + + // If this isn't switching on an invariant condition, we can't unswitch it. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector<BasicBlock *, 16> ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + SmallVector<int, 4> ExitCaseIndices; + for (auto Case : SI.cases()) { + auto *SuccBB = Case.getCaseSuccessor(); + if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin())) + ExitCaseIndices.push_back(Case.getCaseIndex()); + } + BasicBlock *DefaultExitBB = nullptr; + if (ExitBlockSet.count(SI.getDefaultDest()) && + !isa<PHINode>(SI.getDefaultDest()->begin()) && + !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) + DefaultExitBB = SI.getDefaultDest(); + else if (ExitCaseIndices.empty()) + return false; + + DEBUG(dbgs() << " unswitching trivial cases...\n"); + + SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases; + ExitCases.reserve(ExitCaseIndices.size()); + // We walk the case indices backwards so that we remove the last case first + // and don't disrupt the earlier indices. + for (unsigned Index : reverse(ExitCaseIndices)) { + auto CaseI = SI.case_begin() + Index; + // Save the value of this case. + ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); + // Delete the unswitched cases. + SI.removeCase(CaseI); + } + + // Check if after this all of the remaining cases point at the same + // successor. + BasicBlock *CommonSuccBB = nullptr; + if (SI.getNumCases() > 0 && + std::all_of(std::next(SI.case_begin()), SI.case_end(), + [&SI](const SwitchInst::CaseHandle &Case) { + return Case.getCaseSuccessor() == + SI.case_begin()->getCaseSuccessor(); + })) + CommonSuccBB = SI.case_begin()->getCaseSuccessor(); + + if (DefaultExitBB) { + // We can't remove the default edge so replace it with an edge to either + // the single common remaining successor (if we have one) or an unreachable + // block. + if (CommonSuccBB) { + SI.setDefaultDest(CommonSuccBB); + } else { + BasicBlock *ParentBB = SI.getParent(); + BasicBlock *UnreachableBB = BasicBlock::Create( + ParentBB->getContext(), + Twine(ParentBB->getName()) + ".unreachable_default", + ParentBB->getParent()); + new UnreachableInst(ParentBB->getContext(), UnreachableBB); + SI.setDefaultDest(UnreachableBB); + DT.addNewBlock(UnreachableBB, ParentBB); + } + } else { + // If we're not unswitching the default, we need it to match any cases to + // have a common successor or if we have no cases it is the common + // successor. + if (SI.getNumCases() == 0) + CommonSuccBB = SI.getDefaultDest(); + else if (SI.getDefaultDest() != CommonSuccBB) + CommonSuccBB = nullptr; + } + + // Split the preheader, so that we know that there is a safe place to insert + // the switch. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + OldPH->getTerminator()->eraseFromParent(); + + // Now add the unswitched switch. + auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); + + // Split any exit blocks with remaining in-loop predecessors. We walk in + // reverse so that we split in the same order as the cases appeared. This is + // purely for convenience of reading the resulting IR, but it doesn't cost + // anything really. + SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; + // Handle the default exit if necessary. + // FIXME: It'd be great if we could merge this with the loop below but LLVM's + // ranges aren't quite powerful enough yet. + if (DefaultExitBB && !pred_empty(DefaultExitBB)) { + auto *SplitBB = + SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); + updateLoopExitIDom(DefaultExitBB, L, DT); + DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; + } + // Note that we must use a reference in the for loop so that we update the + // container. + for (auto &CasePair : reverse(ExitCases)) { + // Grab a reference to the exit block in the pair so that we can update it. + BasicBlock *&ExitBB = CasePair.second; + + // If this case is the last edge into the exit block, we can simply reuse it + // as it will no longer be a loop exit. No mapping necessary. + if (pred_empty(ExitBB)) + continue; + + // Otherwise we need to split the exit block so that we retain an exit + // block from the loop and a target for the unswitched condition. + BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; + if (!SplitExitBB) { + // If this is the first time we see this, do the split and remember it. + SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); + updateLoopExitIDom(ExitBB, L, DT); + } + ExitBB = SplitExitBB; + } + + // Now add the unswitched cases. We do this in reverse order as we built them + // in reverse order. + for (auto CasePair : reverse(ExitCases)) { + ConstantInt *CaseVal = CasePair.first; + BasicBlock *UnswitchedBB = CasePair.second; + + NewSI->addCase(CaseVal, UnswitchedBB); + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + } + + // If the default was unswitched, re-point it and add explicit cases for + // entering the loop. + if (DefaultExitBB) { + NewSI->setDefaultDest(DefaultExitBB); + updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); + + // We removed all the exit cases, so we just copy the cases to the + // unswitched switch. + for (auto Case : SI.cases()) + NewSI->addCase(Case.getCaseValue(), NewPH); + } + + // If we ended up with a common successor for every path through the switch + // after unswitching, rewrite it to an unconditional branch to make it easy + // to recognize. Otherwise we potentially have to recognize the default case + // pointing at unreachable and other complexity. + if (CommonSuccBB) { + BasicBlock *BB = SI.getParent(); + SI.eraseFromParent(); + BranchInst::Create(CommonSuccBB, BB); + } + + DT.verifyDomTree(); + ++NumTrivial; + ++NumSwitches; + return true; +} + +/// This routine scans the loop to find a branch or switch which occurs before +/// any side effects occur. These can potentially be unswitched without +/// duplicating the loop. If a branch or switch is successfully unswitched the +/// scanning continues to see if subsequent branches or switches have become +/// trivial. Once all trivial candidates have been unswitched, this routine +/// returns. +/// +/// The return value indicates whether anything was unswitched (and therefore +/// changed). +static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, + LoopInfo &LI) { + bool Changed = false; + + // If loop header has only one reachable successor we should keep looking for + // trivial condition candidates in the successor as well. An alternative is + // to constant fold conditions and merge successors into loop header (then we + // only need to check header's terminator). The reason for not doing this in + // LoopUnswitch pass is that it could potentially break LoopPassManager's + // invariants. Folding dead branches could either eliminate the current loop + // or make other loops unreachable. LCSSA form might also not be preserved + // after deleting branches. The following code keeps traversing loop header's + // successors until it finds the trivial condition candidate (condition that + // is not a constant). Since unswitching generates branches with constant + // conditions, this scenario could be very common in practice. + BasicBlock *CurrentBB = L.getHeader(); + SmallPtrSet<BasicBlock *, 8> Visited; + Visited.insert(CurrentBB); + do { + // Check if there are any side-effecting instructions (e.g. stores, calls, + // volatile loads) in the part of the loop that the code *would* execute + // without unswitching. + if (llvm::any_of(*CurrentBB, + [](Instruction &I) { return I.mayHaveSideEffects(); })) + return Changed; + + TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + + if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { + // Don't bother trying to unswitch past a switch with a constant + // condition. This should be removed prior to running this pass by + // simplify-cfg. + if (isa<Constant>(SI->getCondition())) + return Changed; + + if (!unswitchTrivialSwitch(L, *SI, DT, LI)) + // Coludn't unswitch this one so we're done. + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // If unswitching turned the terminator into an unconditional branch then + // we can continue. The unswitching logic specifically works to fold any + // cases it can into an unconditional branch to make it easier to + // recognize here. + auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); + if (!BI || BI->isConditional()) + return Changed; + + CurrentBB = BI->getSuccessor(0); + continue; + } + + auto *BI = dyn_cast<BranchInst>(CurrentTerm); + if (!BI) + // We do not understand other terminator instructions. + return Changed; + + // Don't bother trying to unswitch past an unconditional branch or a branch + // with a constant value. These should be removed by simplify-cfg prior to + // running this pass. + if (!BI->isConditional() || isa<Constant>(BI->getCondition())) + return Changed; + + // Found a trivial condition candidate: non-foldable conditional branch. If + // we fail to unswitch this, we can't do anything else that is trivial. + if (!unswitchTrivialBranch(L, *BI, DT, LI)) + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // We unswitched the branch. This should always leave us with an + // unconditional branch that we can follow now. + BI = cast<BranchInst>(CurrentBB->getTerminator()); + assert(!BI->isConditional() && + "Cannot form a conditional branch by unswitching1"); + CurrentBB = BI->getSuccessor(0); + + // When continuing, if we exit the loop or reach a previous visited block, + // then we can not reach any trivial condition candidates (unfoldable + // branch instructions or switch instructions) and no unswitch can happen. + } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); + + return Changed; +} + +/// Unswitch control flow predicated on loop invariant conditions. +/// +/// This first hoists all branches or switches which are trivial (IE, do not +/// require duplicating any part of the loop) out of the loop body. It then +/// looks at other loop invariant control flows and tries to unswitch those as +/// well by cloning the loop if the result is small enough. +static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC) { + assert(L.isLCSSAForm(DT) && + "Loops must be in LCSSA form before unswitching."); + bool Changed = false; + + // Must be in loop simplified form: we need a preheader and dedicated exits. + if (!L.isLoopSimplifyForm()) + return false; + + // Try trivial unswitch first before loop over other basic blocks in the loop. + Changed |= unswitchAllTrivialConditions(L, DT, LI); + + // FIXME: Add support for non-trivial unswitching by cloning the loop. + + return Changed; +} + +PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function &F = *L.getHeader()->getParent(); + (void)F; + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); + + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) + return PreservedAnalyses::all(); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + AR.DT.verifyDomTree(); +#endif + return getLoopPassPreservedAnalyses(); +} + +namespace { +class SimpleLoopUnswitchLegacyPass : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { + initializeSimpleLoopUnswitchLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + getLoopAnalysisUsage(AU); + } +}; +} // namespace + +bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); + + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + + bool Changed = unswitchLoop(*L, DT, LI, AC); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + DT.verifyDomTree(); +#endif + return Changed; +} + +char SimpleLoopUnswitchLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) + +Pass *llvm::createSimpleLoopUnswitchLegacyPass() { + return new SimpleLoopUnswitchLegacyPass(); +} diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 22af21d55c019..3d5cbfc93f2e6 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted - // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. - SmallVector<WeakVH, 8> PHIs; + // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. + SmallVector<WeakTrackingVH, 8> PHIs; for (BasicBlock::iterator I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) PHIs.push_back(PN); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 385c12302e040..d5124ac89016f 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -245,7 +245,7 @@ namespace { void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone){ - WeakVH &BBEntry = VMap[BB]; + WeakTrackingVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -547,7 +547,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the - // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 5d6fbc3325fff..6d56e08af99f9 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -1640,7 +1640,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, - CalledFunc->getParamAlignment(ArgNo+1)); + CalledFunc->getParamAlignment(ArgNo)); if (ActualArg != *AI) ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } @@ -2302,7 +2302,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; auto &DL = Caller->getParent()->getDataLayout(); - if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) { + if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index fe93d6927c638..42aca757c2afd 100644 --- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -48,16 +49,6 @@ using namespace llvm; STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted"); STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); -static cl::opt<bool> LibCallsShrinkWrapDoDomainError( - "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with domain errors")); -static cl::opt<bool> LibCallsShrinkWrapDoRangeError( - "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with range errors")); -static cl::opt<bool> LibCallsShrinkWrapDoPoleError( - "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with pole errors")); - namespace { class LibCallsShrinkWrapLegacyPass : public FunctionPass { public: @@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", namespace { class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> { public: - LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){}; - bool isChanged() const { return Changed; } + LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT) + : TLI(TLI), DT(DT){}; void visitCallInst(CallInst &CI) { checkCandidate(CI); } - void perform() { + bool perform() { + bool Changed = false; for (auto &CI : WorkList) { DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() << "\n"); @@ -94,6 +86,7 @@ public: DEBUG(dbgs() << "Transformed\n"); } } + return Changed; } private: @@ -134,8 +127,8 @@ private: } const TargetLibraryInfo &TLI; + DominatorTree *DT; SmallVector<CallInst *, 16> WorkList; - bool Changed; }; } // end anonymous namespace @@ -241,8 +234,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_atanhf: // Same as atanh case LibFunc_atanhl: // Same as atanh { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedTwoCond; Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f); break; @@ -262,8 +253,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_logbf: // Same as log case LibFunc_logbl: // Same as log { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f); break; @@ -274,8 +263,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_log1pf: // Same as log1p case LibFunc_log1pl: // Same as log1p { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f); break; @@ -285,9 +272,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, // RangeError: overflow or underflow case LibFunc_powf: case LibFunc_powl: { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError || - !LibCallsShrinkWrapDoRangeError) - return false; Cond = generateCondForPow(CI, Func); if (Cond == nullptr) return false; @@ -346,7 +330,7 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, UpperBound = 11356.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedOneCond; @@ -410,7 +394,7 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, UpperBound = 11383.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedTwoCond; @@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, // Wrap conditions that can potentially generate errno to the library call. void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { - assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst"); + assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst"); MDNode *BranchWeights = MDBuilder(CI->getContext()).createBranchWeights(1, 2000); + TerminatorInst *NewInst = - SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights); + SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT); BasicBlock *CallBB = NewInst->getParent(); CallBB->setName("cdce.call"); - CallBB->getSingleSuccessor()->setName("cdce.end"); + BasicBlock *SuccBB = CallBB->getSingleSuccessor(); + assert(SuccBB && "The split block should have a single successor"); + SuccBB->setName("cdce.end"); CI->removeFromParent(); CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); DEBUG(dbgs() << "== Basic Block After =="); @@ -522,32 +509,38 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) { TLI.getLibFunc(*Callee, Func); assert(Func && "perform() is not expecting an empty function"); - if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func)) - return true; - - if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func)) + if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func)) return true; - return performCallErrors(CI, Func); } void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } -static bool runImpl(Function &F, const TargetLibraryInfo &TLI) { +static bool runImpl(Function &F, const TargetLibraryInfo &TLI, + DominatorTree *DT) { if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - LibCallsShrinkWrap CCDCE(TLI); + LibCallsShrinkWrap CCDCE(TLI, DT); CCDCE.visit(F); - CCDCE.perform(); - return CCDCE.isChanged(); + bool Changed = CCDCE.perform(); + +// Verify the dominator after we've updated it locally. +#ifndef NDEBUG + if (DT) + DT->verifyDomTree(); +#endif + return Changed; } bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - return runImpl(F, TLI); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return runImpl(F, TLI, DT); } namespace llvm { @@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() { PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, FunctionAnalysisManager &FAM) { auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - bool Changed = runImpl(F, TLI); - if (!Changed) + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + if (!runImpl(F, TLI, DT)) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); return PA; } } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d3002c5fb7502..ce6b703f3528d 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -562,7 +562,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // that can be removed. BB->removePredecessor(Pred, true); - WeakVH PhiIt = &BB->front(); + WeakTrackingVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); Value *OldPhiIt = PhiIt; @@ -1259,49 +1259,6 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { DbgValues.push_back(DVI); } -static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) { - if (Offset > 0) { - Ops.push_back(dwarf::DW_OP_plus); - Ops.push_back(Offset); - } else if (Offset < 0) { - Ops.push_back(dwarf::DW_OP_minus); - Ops.push_back(-Offset); - } -} - -enum { WithStackValue = true }; - -/// Prepend \p DIExpr with a deref and offset operation and optionally turn it -/// into a stack value. -static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, - bool Deref, int64_t Offset = 0, - bool StackValue = false) { - if (!Deref && !Offset && !StackValue) - return DIExpr; - - SmallVector<uint64_t, 8> Ops; - appendOffset(Ops, Offset); - if (Deref) - Ops.push_back(dwarf::DW_OP_deref); - if (DIExpr) - for (auto Op : DIExpr->expr_ops()) { - // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. - if (StackValue) { - if (Op.getOp() == dwarf::DW_OP_stack_value) - StackValue = false; - else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { - Ops.push_back(dwarf::DW_OP_stack_value); - StackValue = false; - } - } - Ops.push_back(Op.getOp()); - for (unsigned I = 0; I < Op.getNumArgs(); ++I) - Ops.push_back(Op.getArg(I)); - } - if (StackValue) - Ops.push_back(dwarf::DW_OP_stack_value); - return Builder.createExpression(Ops); -} bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, @@ -1313,9 +1270,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, auto *DIVar = DDI->getVariable(); auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - - DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset); - + DIExpr = DIExpression::prepend(DIExpr, Deref, Offset); // Insert llvm.dbg.declare immediately after the original alloca, and remove // old llvm.dbg.declare. Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); @@ -1348,7 +1303,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, if (Offset) { SmallVector<uint64_t, 4> Ops; Ops.push_back(dwarf::DW_OP_deref); - appendOffset(Ops, Offset); + DIExpression::appendOffset(Ops, Offset); Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); DIExpr = Builder.createExpression(Ops); } @@ -1398,8 +1353,9 @@ void llvm::salvageDebugInfo(Instruction &I) { auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); // GEP offsets are i32 and thus always fit into an int64_t. - DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(), - WithStackValue); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, + Offset.getSExtValue(), + DIExpression::WithStackValue); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1411,7 +1367,7 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - DIExpr = prependDIExpr(DIB, DIExpr, WithDeref); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1520,7 +1476,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, II->setAttributes(CI->getAttributes()); // Make sure that anything using the call now uses the invoke! This also - // updates the CallGraph if present, because it uses a WeakVH. + // updates the CallGraph if present, because it uses a WeakTrackingVH. CI->replaceAllUsesWith(II); // Delete the original call diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index e7ba19665d591..72c06aef80370 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -210,7 +210,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -628,7 +628,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { if (SE) SE->forgetValue(PN); if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 43ab725b07691..4ab4d7949d233 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -757,7 +757,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; simplifyLoopIVs(L, SE, DT, LI, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already @@ -777,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = &*I++; - if (Value *V = SimplifyInstruction(Inst, DL)) + if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); if (isInstructionTriviallyDead(Inst)) diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index dbe42c201dd4f..29d334f2968f1 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -237,3 +237,35 @@ void llvm::filterDeadComdatFunctions( ComdatEntriesCovered.end(); }); } + +std::string llvm::getUniqueModuleId(Module *M) { + MD5 Md5; + bool ExportsSymbols = false; + auto AddGlobal = [&](GlobalValue &GV) { + if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + !GV.hasExternalLinkage()) + return; + ExportsSymbols = true; + Md5.update(GV.getName()); + Md5.update(ArrayRef<uint8_t>{0}); + }; + + for (auto &F : *M) + AddGlobal(F); + for (auto &GV : M->globals()) + AddGlobal(GV); + for (auto &GA : M->aliases()) + AddGlobal(GA); + for (auto &IF : M->ifuncs()) + AddGlobal(IF); + + if (!ExportsSymbols) + return ""; + + MD5::MD5Result R; + Md5.final(R); + + SmallString<32> Str; + MD5::stringifyResult(R, Str); + return ("$" + Str).str(); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a33b85c4ee69a..cdba982e6641f 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -225,10 +225,10 @@ struct PromoteMem2Reg { std::vector<AllocaInst *> Allocas; DominatorTree &DT; DIBuilder DIB; - /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; + const SimplifyQuery SQ; /// Reverse mapping of Allocas. DenseMap<AllocaInst *, unsigned> AllocaLookup; @@ -270,7 +270,8 @@ public: AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC) {} + AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), + nullptr, &DT, AC) {} void run(); @@ -673,8 +674,6 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } - const DataLayout &DL = F.getParent()->getDataLayout(); - // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -699,7 +698,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { + if (Value *V = SimplifyInstruction(PN, SQ)) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f86e97b6cc72f..7a3e8b9ae915d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2231,7 +2231,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, } // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, DL)) { + if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { @@ -2307,7 +2307,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); - if (Value *V = SimplifyInstruction(PN, DL)) { + if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); continue; @@ -3545,7 +3545,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - if (Value *V = SimplifyInstruction(ICI, DL)) { + if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index a4cc6a031ad4c..02a5d3dbeadfb 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -51,13 +51,13 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; - SmallVectorImpl<WeakVH> &DeadInsts; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; bool Changed; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead) + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -701,7 +701,7 @@ void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead, + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead, IVVisitor *V) { SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); SIV.simplifyUsers(CurrIV, V); @@ -711,7 +711,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) { + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) { bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead); diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 27373427d4f7d..2509b5f22046b 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -54,8 +54,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = - SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) { + if (Value *V = SimplifyInstruction(I, SQ, ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5549444047088..f112c555205c3 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3899,11 +3899,13 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, } /// \brief Check that the Values in the slice in VL array are still existent in -/// the WeakVH array. +/// the WeakTrackingVH array. /// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakVH array. -static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, - unsigned SliceBegin, unsigned SliceSize) { +/// to become invalid. We track when this has happened in the WeakTrackingVH +/// array. +static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, + ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin, + unsigned SliceSize) { VL = VL.slice(SliceBegin, SliceSize); VH = VH.slice(SliceBegin, SliceSize); return !std::equal(VL.begin(), VL.end(), VH.begin()); @@ -3921,7 +3923,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, return false; // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end()); + SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end()); bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. @@ -4107,7 +4109,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool Changed = false; // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); + SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end()); unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; @@ -4734,7 +4736,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, namespace { /// Tracks instructons and its children. -class WeakVHWithLevel final : public CallbackVH { +class WeakTrackingVHWithLevel final : public CallbackVH { /// Operand index of the instruction currently beeing analized. unsigned Level = 0; /// Is this the instruction that should be vectorized, or are we now @@ -4743,8 +4745,8 @@ class WeakVHWithLevel final : public CallbackVH { bool IsInitial = true; public: - explicit WeakVHWithLevel() = default; - WeakVHWithLevel(Value *V) : CallbackVH(V){}; + explicit WeakTrackingVHWithLevel() = default; + WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){}; /// Restart children analysis each time it is repaced by the new instruction. void allUsesReplacedWith(Value *New) override { setValPtr(New); @@ -4771,7 +4773,7 @@ public: cast<Instruction>(getValPtr())->getNumOperands() > Level); return cast<Instruction>(getValPtr())->getOperand(Level++); } - virtual ~WeakVHWithLevel() = default; + virtual ~WeakTrackingVHWithLevel() = default; }; } // namespace @@ -4793,7 +4795,7 @@ static bool canBeVectorized( if (Root->getParent() != BB) return false; - SmallVector<WeakVHWithLevel, 8> Stack(1, Root); + SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root); SmallSet<Value *, 8> VisitedInstrs; bool Res = false; while (!Stack.empty()) { @@ -5069,7 +5071,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector<Value *> Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakVHs will have nullified the + // initially collected them. If so, the WeakTrackingVHs will have + // nullified the // values, so remove them from the set of candidates. Candidates.remove(nullptr); |