diff options
Diffstat (limited to 'lib/Transforms/Scalar')
22 files changed, 895 insertions, 217 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b323ab3bd443..523390758769 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -55,6 +55,7 @@ add_llvm_library(LLVMScalarOpts Scalar.cpp Scalarizer.cpp SeparateConstOffsetFromGEP.cpp + SimpleLoopUnswitch.cpp SimplifyCFGPass.cpp Sink.cpp SpeculativeExecution.cpp diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index b5a4cc2f3953..dc864f48bf1f 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, Changed = true; } - if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) { + if (Value *V = SimplifyInstruction(P, SQ)) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; @@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { return false; LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; - auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>(); - auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; - const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); - return runImpl(F, LVI, SQ); + return runImpl(F, LVI, getBestSimplifyQuery(*this, F)); } PreservedAnalyses CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); - auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); - auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); - auto *AC = AM.getCachedResult<AssumptionAnalysis>(F); - const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); - bool Changed = runImpl(F, LVI, SQ); + bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F)); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 04479b6e49ac..d8f8a58a5fdf 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -253,6 +253,7 @@ public: const TargetTransformInfo &TTI; DominatorTree &DT; AssumptionCache &AC; + const SimplifyQuery SQ; MemorySSA *MSSA; std::unique_ptr<MemorySSAUpdater> MSSAUpdater; typedef RecyclingAllocator< @@ -315,9 +316,10 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, - DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) - : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), + EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, DominatorTree &DT, + AssumptionCache &AC, MemorySSA *MSSA) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) { } @@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { /// stores which can occur in bitfield code among other things. Instruction *LastStore = nullptr; - const DataLayout &DL = BB->getModule()->getDataLayout(); - // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { @@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Skip assume intrinsics, they don't really have side effects (although // they're marked as such to ensure preservation of control dependencies), - // and this pass will not disturb any of the assumption's control - // dependencies. + // and this pass will not bother with its removal. However, we should mark + // its condition as true for all dominated blocks. if (match(Inst, m_Intrinsic<Intrinsic::assume>())) { - DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); + auto *CondI = + dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0)); + if (CondI && SimpleValue::canHandle(CondI)) { + DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n'); + AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } else + DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); continue; } @@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) { if (auto *CondI = dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) { - // The condition we're on guarding here is true for all dominated - // locations. - if (SimpleValue::canHandle(CondI)) + if (SimpleValue::canHandle(CondI)) { + // Do we already know the actual value of this condition? + if (auto *KnownCond = AvailableValues.lookup(CondI)) { + // Is the condition known to be true? + if (isa<ConstantInt>(KnownCond) && + cast<ConstantInt>(KnownCond)->isOneValue()) { + DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + continue; + } else + // Use the known value if it wasn't true. + cast<CallInst>(Inst)->setArgOperand(0, KnownCond); + } + // The condition we're on guarding here is true for all dominated + // locations. AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } } // Guard intrinsics read all memory, but don't write any memory. @@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { + if (Value *V = SimplifyInstruction(Inst, SQ)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); bool Killed = false; if (!Inst->use_empty()) { @@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F, auto *MSSA = UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); if (!CSE.run()) return PreservedAnalyses::all(); @@ -1008,7 +1029,7 @@ public: auto *MSSA = UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); return CSE.run(); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index be696df548d5..c04646eed49a 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) { // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. const DataLayout &DL = I->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { + if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) { bool Changed = false; if (!I->use_empty()) { I->replaceAllUsesWith(V); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index dcb2a4a0c6e6..3953198fe605 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -97,7 +97,7 @@ class IndVarSimplify { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; bool Changed = false; bool isValidRewrite(Value *FromVal, Value *ToVal); @@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { Compare->getName()); // In the following deletions, PN may become dead and may be deleted. - // Use a WeakVH to observe whether this happens. - WeakVH WeakPH = PN; + // Use a WeakTrackingVH to observe whether this happens. + WeakTrackingVH WeakPH = PN; // Delete the old floating point exit comparison. The branch starts using the // new comparison. @@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { // BasicBlock *Header = L->getHeader(); - SmallVector<WeakVH, 8> PHIs; + SmallVector<WeakTrackingVH, 8> PHIs; for (BasicBlock::iterator I = Header->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) PHIs.push_back(PN); @@ -901,7 +901,7 @@ class WidenIV { PHINode *WidePhi; Instruction *WideInc; const SCEV *WideIncExpr; - SmallVectorImpl<WeakVH> &DeadInsts; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; SmallPtrSet<Instruction *,16> Widened; SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; @@ -941,20 +941,13 @@ class WidenIV { } public: - WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, - ScalarEvolution *SEv, DominatorTree *DTree, - SmallVectorImpl<WeakVH> &DI, bool HasGuards) : - OrigPhi(WI.NarrowIV), - WideType(WI.WidestNativeType), - LI(LInfo), - L(LI->getLoopFor(OrigPhi->getParent())), - SE(SEv), - DT(DTree), - HasGuards(HasGuards), - WidePhi(nullptr), - WideInc(nullptr), - WideIncExpr(nullptr), - DeadInsts(DI) { + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 9e2563879da2..5e116ef2fe75 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -138,7 +138,7 @@ private: // Tries to infer the specific address space of each address expression in // Postorder. - void inferAddressSpaces(const std::vector<Value *> &Postorder, + void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const; bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; @@ -147,7 +147,7 @@ private: // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. bool - rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder, + rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; @@ -162,7 +162,7 @@ private: std::vector<std::pair<Value *, bool>> &PostorderStack, DenseSet<Value *> &Visited) const; - std::vector<Value *> collectFlatAddressExpressions(Function &F) const; + std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; Value *cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, @@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, DenseSet<Value *> &Visited) const { assert(V->getType()->isPointerTy()); + + // Generic addressing expressions may be hidden in nested constant + // expressions. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + // TODO: Look in non-address parts, like icmp operands. + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.push_back(std::make_pair(CE, false)); + + return; + } + if (isAddressExpression(*V) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { - if (Visited.insert(V).second) + if (Visited.insert(V).second) { PostorderStack.push_back(std::make_pair(V, false)); + + Operator *Op = cast<Operator>(V); + for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) { + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.emplace_back(CE, false); + } + } + } } } // Returns all flat address expressions in function F. The elements are ordered // ordered in postorder. -std::vector<Value *> +std::vector<WeakTrackingVH> InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // This function implements a non-recursive postorder traversal of a partial // use-def graph of function F. @@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { PushPtrOperand(Cmp->getOperand(0)); PushPtrOperand(Cmp->getOperand(1)); } + } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { + if (!ASC->getType()->isVectorTy()) + PushPtrOperand(ASC->getPointerOperand()); } } - std::vector<Value *> Postorder; // The resultant postorder. + std::vector<WeakTrackingVH> Postorder; // The resultant postorder. while (!PostorderStack.empty()) { + Value *TopVal = PostorderStack.back().first; // If the operands of the expression on the top are already explored, // adds that expression to the resultant postorder. if (PostorderStack.back().second) { - Postorder.push_back(PostorderStack.back().first); + Postorder.push_back(TopVal); PostorderStack.pop_back(); continue; } // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().second = true; - for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { + for (Value *PtrOperand : getPointerOperands(*TopVal)) { appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, Visited); } @@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return false; // Collects all flat address expressions in postorder. - std::vector<Value *> Postorder = collectFlatAddressExpressions(F); + std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F); // Runs a data-flow analysis to refine the address spaces of every expression // in Postorder. @@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); } +// Constants need to be tracked through RAUW to handle cases with nested +// constant expressions, so wrap values in WeakTrackingVH. void InferAddressSpaces::inferAddressSpaces( - const std::vector<Value *> &Postorder, + ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const { SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); // Initially, all expressions are in the uninitialized address space. @@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I, } bool InferAddressSpaces::rewriteWithNewAddressSpaces( - const std::vector<Value *> &Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { + ArrayRef<WeakTrackingVH> Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer // operands are converted, the clone is naturally in the new address space by @@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); } + SmallVector<Instruction *, 16> DeadInstructions; + // Replaces the uses of the old address expressions with the new ones. - for (Value *V : Postorder) { + for (const WeakTrackingVH &WVH : Postorder) { + assert(WVH && "value was unexpectedly deleted"); + Value *V = WVH; Value *NewV = ValueWithNewAddrSpace.lookup(V); if (NewV == nullptr) continue; @@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " << *NewV << '\n'); + if (Constant *C = dyn_cast<Constant>(V)) { + Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), + C->getType()); + if (C != Replace) { + DEBUG(dbgs() << "Inserting replacement const cast: " + << Replace << ": " << *Replace << '\n'); + C->replaceAllUsesWith(Replace); + V = Replace; + } + } + Value::use_iterator I, E, Next; for (I = V->use_begin(), E = V->use_end(); I != E; ) { Use &U = *I; @@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } + if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) { + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (ASC->getDestAddressSpace() == NewAS) { + ASC->replaceAllUsesWith(NewV); + DeadInstructions.push_back(ASC); + continue; + } + } + // Otherwise, replaces the use with flat(NewV). if (Instruction *I = dyn_cast<Instruction>(V)) { BasicBlock::iterator InsertPos = std::next(I->getIterator()); @@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } - if (V->use_empty()) - RecursivelyDeleteTriviallyDeadInstructions(V); + if (V->use_empty()) { + if (Instruction *I = dyn_cast<Instruction>(V)) + DeadInstructions.push_back(I); + } } + for (Instruction *I : DeadInstructions) + RecursivelyDeleteTriviallyDeadInstructions(I); + return true; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index a0da81605a80..7dacaba1193e 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( Value *LHS = PN->getIncomingValue(i); Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); - Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL}); if (!Res) { if (!isa<Constant>(RHS)) continue; @@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + Constant *OnlyVal = nullptr; + Constant *MultipleVal = (Constant *)(intptr_t)~0ULL; + unsigned PredWithKnownDest = 0; for (const auto &PredValue : PredValues) { BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. - // If the predecessor ends with an indirect goto, we can't change its - // destination. - if (isa<IndirectBrInst>(Pred->getTerminator())) - continue; - Constant *Val = PredValue.first; BasicBlock *DestBB; if (isa<UndefValue>(Val)) DestBB = nullptr; - else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + assert(isa<ConstantInt>(Val) && "Expecting a constant integer"); DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); - else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + assert(isa<ConstantInt>(Val) && "Expecting a constant integer"); DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor(); } else { assert(isa<IndirectBrInst>(BB->getTerminator()) && "Unexpected terminator"); + assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress"); DestBB = cast<BlockAddress>(Val)->getBasicBlock(); } // If we have exactly one destination, remember it for efficiency below. - if (PredToDestList.empty()) + if (PredToDestList.empty()) { OnlyDest = DestBB; - else if (OnlyDest != DestBB) - OnlyDest = MultipleDestSentinel; + OnlyVal = Val; + } else { + if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + // It possible we have same destination, but different value, e.g. default + // case in switchinst. + if (Val != OnlyVal) + OnlyVal = MultipleVal; + } + + // We know where this predecessor is going. + ++PredWithKnownDest; + + // If the predecessor ends with an indirect goto, we can't change its + // destination. + if (isa<IndirectBrInst>(Pred->getTerminator())) + continue; PredToDestList.push_back(std::make_pair(Pred, DestBB)); } @@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // not thread. By doing so, we do not need to duplicate the current block and // also miss potential opportunities in case we dont/cant duplicate. if (OnlyDest && OnlyDest != MultipleDestSentinel) { - if (PredToDestList.size() == + if (PredWithKnownDest == (size_t)std::distance(pred_begin(BB), pred_end(BB))) { bool SeenFirstBranchToOnlyDest = false; for (BasicBlock *SuccBB : successors(BB)) { @@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // If the condition is now dead due to the removal of the old terminator, // erase it. - auto *CondInst = dyn_cast<Instruction>(Cond); - if (CondInst && CondInst->use_empty()) - CondInst->eraseFromParent(); - // FIXME: in case this instruction is defined in the current BB and it - // resolves to a single value from all predecessors, we can do RAUW. + if (auto *CondInst = dyn_cast<Instruction>(Cond)) { + if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + else if (OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) { + // If we just learned Cond is the same value for all uses of the + // condition, replace it with a constant value + CondInst->replaceAllUsesWith(OnlyVal); + if (!CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + } + } return true; } } @@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // If this instruction can be simplified after the operands are updated, // just use the simplified value instead. This frequently happens due to // phi translation. - if (Value *IV = - SimplifyInstruction(New, BB->getModule()->getDataLayout())) { + if (Value *IV = SimplifyInstruction( + New, + {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; if (!New->mayHaveSideEffects()) { delete New; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5042fc18d7c4..410fbb03068f 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { - WeakVH InstPtr(&*I); + WeakTrackingVH InstPtr(&*I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; @@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like -/// for (i) A[i] = B[i]; +/// for (i) A[i] = B[i]; bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount) { assert(SI->isSimple() && "Expected only non-volatile stores."); diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 28e71ca05436..af095560cc02 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, DL, TLI, DT, AC); + Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC}); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 8ce96cf1b7a6..2ba9265566a8 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C)); + Value *V = SimplifyInstruction(C, SQ); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. @@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &) { int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); - const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC); - LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ); + const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL); + LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + SQ); bool Changed = LR.processLoop(&L); if (!Changed) @@ -714,10 +715,7 @@ public: auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - const SimplifyQuery SQ(DL, TLI, DT, AC); + const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ); return LR.processLoop(L); } diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index a5a81c33a8eb..35c05e84fd68 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) { bool Changed = false; // Copy blocks into a temporary array to avoid iterator invalidation issues // as we remove them. - SmallVector<WeakVH, 16> Blocks(L.blocks()); + SmallVector<WeakTrackingVH, 16> Blocks(L.blocks()); for (auto &Block : Blocks) { // Attempt to merge blocks in the trivial case. Don't modify blocks which diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index af137f6faa63..ccedb98d7fa1 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S, /// If any of the instructions is the specified set are trivially dead, delete /// them and see if this makes any of their operands subsequently dead. static bool -DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { @@ -1845,7 +1845,7 @@ class LSRInstance { void FinalizeChain(IVChain &Chain); void CollectChains(); void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts); + SmallVectorImpl<WeakTrackingVH> &DeadInsts); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -1920,19 +1920,15 @@ class LSRInstance { const LSRUse &LU, SCEVExpander &Rewriter) const; - Value *Expand(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, + BasicBlock::iterator IP, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; - void Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; + void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); public: @@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. const IVInc &Head = Chain.Incs[0]; @@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). -Value *LSRInstance::Expand(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, +Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, BasicBlock::iterator IP, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; @@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU, /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI(PHINode *PN, - const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::RewriteForPHI( + PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, + SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { DenseMap<BasicBlock *, Value *> Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. -void LSRInstance::Rewrite(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { @@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution( const SmallVectorImpl<const Formula *> &Solution) { // Keep track of instructions we may have made dead, so that // we can remove them after we are done working. - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr"); @@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Rewriter(SE, DL, "lsr"); #ifndef NDEBUG diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 8fa806a7e8bc..6ef1464e9338 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, LoopProcessWorklist.push_back(NewLoop); redoLoop = true; - // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody + // Keep a WeakTrackingVH holding onto LIC. If the first call to + // RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. - WeakVH LICHandle(LIC); + WeakTrackingVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. @@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector<Instruction*> &Worklist, Loop *L, LPPassManager *LPM) { - DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); + DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, LPM->deleteSimpleAnalysisValue(I, L); RemoveFromWorklist(I, Worklist); I->replaceAllUsesWith(V); - I->eraseFromParent(); + if (!I->mayHaveSideEffects()) + I->eraseFromParent(); ++NumSimplify; } @@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // Simple DCE. if (isInstructionTriviallyDead(I)) { - DEBUG(dbgs() << "Remove dead instruction '" << *I); + DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a3f3f25c1e0f..21a632073da7 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { // Get the alignment of the byval. If the call doesn't specify the alignment, // then it is some target specific value that we can't know. - unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); + unsigned ByValAlign = CS.getParamAlignment(ArgNo); if (ByValAlign == 0) return false; // If it is greater than the memcpy, then we check to see if we can force the diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index c5bf2f28d185..d0bfe3603897 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) { Changed = true; SE->forgetValue(&*I); I->replaceAllUsesWith(NewI); - // If SeenExprs constains I's WeakVH, that entry will be replaced with + // If SeenExprs constains I's WeakTrackingVH, that entry will be + // replaced with // nullptr. RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI); I = NewI->getIterator(); @@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. const SCEV *NewSCEV = SE->getSCEV(&*I); - SeenExprs[NewSCEV].push_back(WeakVH(&*I)); + SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I)); // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) // is equivalent to I. However, ScalarEvolution::getSCEV may // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose @@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. if (NewSCEV != OldSCEV) - SeenExprs[OldSCEV].push_back(WeakVH(&*I)); + SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I)); } } } @@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr, // future instruction either. Therefore, we pop it out of the stack. This // optimization makes the algorithm O(n). while (!Candidates.empty()) { - // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed + // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's + // removed // during rewriting. if (Value *Candidate = Candidates.back()) { Instruction *CandidateInstruction = cast<Instruction>(Candidate); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index a014ddd9ba0a..162d91beae76 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -395,7 +395,6 @@ namespace { class NewGVN { Function &F; DominatorTree *DT; - AssumptionCache *AC; const TargetLibraryInfo *TLI; AliasAnalysis *AA; MemorySSA *MSSA; @@ -405,6 +404,7 @@ class NewGVN { BumpPtrAllocator ExpressionAllocator; ArrayRecycler<Value *> ArgRecycler; TarjanSCC SCCFinder; + const SimplifyQuery SQ; // Number of function arguments, used by ranking unsigned int NumFuncArgs; @@ -504,8 +504,9 @@ public: NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, const DataLayout &DL) - : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), - PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {} + : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), + PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) { + } bool runGVN(); private: @@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, E->op_push_back(lookupOperandLeader(Arg1)); E->op_push_back(lookupOperandLeader(Arg2)); - Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI, - DT, AC); + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V)) return SimplifiedE; return E; @@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) { "Wrong types on cmp instruction"); assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && E->getOperand(1)->getType() == I->getOperand(1)->getType())); - Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa<SelectInst>(I)) { @@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) { assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && E->getOperand(2)->getType() == I->getOperand(2)->getType()); Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), - E->getOperand(2), DL, TLI, DT, AC); + E->getOperand(2), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } } else if (I->isBinaryOp()) { - Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (auto *BI = dyn_cast<BitCastInst>(I)) { - Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC); + Value *V = + SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa<GetElementPtrInst>(I)) { - Value *V = SimplifyGEPInst(E->getType(), - ArrayRef<Value *>(E->op_begin(), E->op_end()), - DL, TLI, DT, AC); + Value *V = SimplifyGEPInst( + E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (AllConstant) { @@ -1628,15 +1628,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { if (PBranch->TrueEdge) { // If we know the previous predicate is true and we are in the true // edge then we may be implied true or false. - if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getTrue(CI->getType())); } - if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getFalse(CI->getType())); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 3dcab6090789..ef29d4141600 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, /// Emit a tree of add instructions, summing Ops together /// and returning the result. Insert the tree before I. static Value *EmitAddTreeOfValues(Instruction *I, - SmallVectorImpl<WeakVH> &Ops){ + SmallVectorImpl<WeakTrackingVH> &Ops) { if (Ops.size() == 1) return Ops.back(); Value *V1 = Ops.back(); @@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); - SmallVector<WeakVH, 4> NewMulOps; + SmallVector<WeakTrackingVH, 4> NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. BinaryOperator *BOp = diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f344eb151464..c11247c06b85 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. -static AttributeList legalizeCallAttributes(AttributeList AS) { - AttributeList Ret; - - for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { - unsigned Index = AS.getSlotIndex(Slot); - - if (Index == AttributeList::ReturnIndex || - Index == AttributeList::FunctionIndex) { - - for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) { - - // Do not allow certain attributes - just skip them - // Safepoint can not be read only or read none. - if (Attr.hasAttribute(Attribute::ReadNone) || - Attr.hasAttribute(Attribute::ReadOnly)) - continue; - - // These attributes control the generation of the gc.statepoint call / - // invoke itself; and once the gc.statepoint is in place, they're of no - // use. - if (isStatepointDirectiveAttr(Attr)) - continue; - - Ret = Ret.addAttributes( - AS.getContext(), Index, - AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr))); - } - } - - // Just skip parameter attributes for now - } - - return Ret; +static AttributeList legalizeCallAttributes(AttributeList AL) { + if (AL.isEmpty()) + return AL; + + // Remove the readonly, readnone, and statepoint function attributes. + AttrBuilder FnAttrs = AL.getFnAttributes(); + FnAttrs.removeAttribute(Attribute::ReadNone); + FnAttrs.removeAttribute(Attribute::ReadOnly); + for (Attribute A : AL.getFnAttributes()) { + if (isStatepointDirectiveAttr(A)) + FnAttrs.remove(A); + } + + // Just skip parameter and return attributes for now + LLVMContext &Ctx = AL.getContext(); + return AttributeList::get(Ctx, AttributeList::FunctionIndex, + AttributeSet::get(Ctx, FnAttrs)); } /// Helper function to place all gc relocates necessary for the given @@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Call->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Call->setAttributes(AttributeList::get(Call->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Call; @@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Invoke->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Invoke->setAttributes(AttributeList::get(Invoke->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Invoke; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d01e91a7f235..1d9beffaf06b 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -25,6 +25,7 @@ #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter Instruction *OldPtr; // Track post-rewrite users which are PHI nodes and Selects. - SmallPtrSetImpl<PHINode *> &PHIUsers; - SmallPtrSetImpl<SelectInst *> &SelectUsers; + SmallSetVector<PHINode *, 8> &PHIUsers; + SmallSetVector<SelectInst *, 8> &SelectUsers; // Utility IR builder, whose name prefix is setup for each visited use, and // the insertion point is set to point to the user. @@ -2199,8 +2200,8 @@ public: uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, - SmallPtrSetImpl<PHINode *> &PHIUsers, - SmallPtrSetImpl<SelectInst *> &SelectUsers) + SmallSetVector<PHINode *, 8> &PHIUsers, + SmallSetVector<SelectInst *, 8> &SelectUsers) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // fact scheduled for promotion. unsigned PPWOldSize = PostPromotionWorklist.size(); unsigned NumUses = 0; - SmallPtrSet<PHINode *, 8> PHIUsers; - SmallPtrSet<SelectInst *, 8> SelectUsers; + SmallSetVector<PHINode *, 8> PHIUsers; + SmallSetVector<SelectInst *, 8> SelectUsers; AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, @@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // Now that we've processed all the slices in the new partition, check if any // PHIs or Selects would block promotion. - for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), - E = PHIUsers.end(); - I != E; ++I) - if (!isSafePHIToSpeculate(**I)) { + for (PHINode *PHI : PHIUsers) + if (!isSafePHIToSpeculate(*PHI)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); break; } - for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), - E = SelectUsers.end(); - I != E; ++I) - if (!isSafeSelectToSpeculate(**I)) { + + for (SelectInst *Sel : SelectUsers) + if (!isSafeSelectToSpeculate(*Sel)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 00e3c95f6f06..52201d8f3e51 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" @@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeCFGSimplifyPassPass(Registry); initializeLateCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); + initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4d594532c365..cde659b9d189 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { // Add I to DominatingExprs if it's an add/sub that can't sign overflow. if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) || match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { - if (isKnownNotFullPoison(I)) { + if (programUndefinedIfFullPoison(I)) { const SCEV *Key = SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); DominatingExprs[Key].push_back(I); diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp new file mode 100644 index 000000000000..fb1b47c48276 --- /dev/null +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -0,0 +1,626 @@ +//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#define DEBUG_TYPE "simple-loop-unswitch" + +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumTrivial, "Number of unswitches that are trivial"); + +static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, + Constant &Replacement) { + assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); + + // Replace uses of LIC in the loop with the given constant. + for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { + // Grab the use and walk past it so we can clobber it in the use list. + Use *U = &*UI++; + Instruction *UserI = dyn_cast<Instruction>(U->getUser()); + if (!UserI || !L.contains(UserI)) + continue; + + // Replace this use within the loop body. + *U = &Replacement; + } +} + +/// Update the dominator tree after removing one exiting predecessor of a loop +/// exit block. +static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, + DominatorTree &DT) { + assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && + "Cannot have empty predecessors of the loop exit block if we split " + "off a block to unswitch!"); + + BasicBlock *IDom = *pred_begin(LoopExitBB); + // Walk all of the other predecessors finding the nearest common dominator + // until all predecessors are covered or we reach the loop header. The loop + // header necessarily dominates all loop exit blocks in loop simplified form + // so we can early-exit the moment we hit that block. + for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); + PI != PE && IDom != L.getHeader(); ++PI) + IDom = DT.findNearestCommonDominator(IDom, *PI); + + DT.changeImmediateDominator(LoopExitBB, IDom); +} + +/// Update the dominator tree after unswitching a particular former exit block. +/// +/// This handles the full update of the dominator tree after hoisting a block +/// that previously was an exit block (or split off of an exit block) up to be +/// reached from the new immediate dominator of the preheader. +/// +/// The common case is simple -- we just move the unswitched block to have an +/// immediate dominator of the old preheader. But in complex cases, there may +/// be other blocks reachable from the unswitched block that are immediately +/// dominated by some node between the unswitched one and the old preheader. +/// All of these also need to be hoisted in the dominator tree. We also want to +/// minimize queries to the dominator tree because each step of this +/// invalidates any DFS numbers that would make queries fast. +static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, + DominatorTree &DT) { + DomTreeNode *OldPHNode = DT[OldPH]; + DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; + // If the dominator tree has already been updated for this unswitched node, + // we're done. This makes it easier to use this routine if there are multiple + // paths to the same unswitched destination. + if (UnswitchedNode->getIDom() == OldPHNode) + return; + + // First collect the domtree nodes that we are hoisting over. These are the + // set of nodes which may have children that need to be hoisted as well. + SmallPtrSet<DomTreeNode *, 4> DomChain; + for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; + IDom = IDom->getIDom()) + DomChain.insert(IDom); + + // The unswitched block ends up immediately dominated by the old preheader -- + // regardless of whether it is the loop exit block or split off of the loop + // exit block. + DT.changeImmediateDominator(UnswitchedNode, OldPHNode); + + // Blocks reachable from the unswitched block may need to change their IDom + // as well. + SmallSetVector<BasicBlock *, 4> Worklist; + for (auto *SuccBB : successors(UnswitchedBB)) + Worklist.insert(SuccBB); + + // Walk the worklist. We grow the list in the loop and so must recompute size. + for (int i = 0; i < (int)Worklist.size(); ++i) { + auto *BB = Worklist[i]; + + DomTreeNode *Node = DT[BB]; + assert(!DomChain.count(Node) && + "Cannot be dominated by a block you can reach!"); + // If this block doesn't have an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree hasn't changed. Either + // it is above the region hoisted and still valid, or it is below the + // hoisted block and so was trivially updated. This also applies to + // everything reachable from this block so we're completely done with the + // it. + if (!DomChain.count(Node->getIDom())) + continue; + + // We need to change the IDom for this node but also walk its successors + // which could have similar dominance position. + DT.changeImmediateDominator(Node, OldPHNode); + for (auto *SuccBB : successors(BB)) + Worklist.insert(SuccBB); + } +} + +/// Unswitch a trivial branch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the branch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and one of the successors is a loop exit. This +/// allows us to unswitch without duplicating the loop, making it trivial. +/// +/// If this routine fails to unswitch the branch it returns false. +/// +/// If the branch can be unswitched, this routine splits the preheader and +/// hoists the branch above that split. Preserves loop simplified form +/// (splitting the exit block as necessary). It simplifies the branch within +/// the loop to an unconditional branch but doesn't remove it entirely. Further +/// cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, + LoopInfo &LI) { + assert(BI.isConditional() && "Can only unswitch a conditional branch!"); + DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); + + Value *LoopCond = BI.getCondition(); + + // Need a trivial loop condition to unswitch. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector<BasicBlock *, 16> ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any + // side-effects. If so, determine the value of Cond that causes + // it to do this. + ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); + ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); + int LoopExitSuccIdx = 0; + auto *LoopExitBB = BI.getSuccessor(0); + if (!ExitBlockSet.count(LoopExitBB)) { + std::swap(CondVal, Replacement); + LoopExitSuccIdx = 1; + LoopExitBB = BI.getSuccessor(1); + if (!ExitBlockSet.count(LoopExitBB)) + return false; + } + auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); + assert(L.contains(ContinueBB) && + "Cannot have both successors exit and still be in the loop!"); + + // If the loop exit block contains phi nodes, this isn't trivial. + // FIXME: We should examine the PHI to determine whether or not we can handle + // it trivially. + if (isa<PHINode>(LoopExitBB->begin())) + return false; + + DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal + << " == " << LoopCond << "\n"); + + // Split the preheader, so that we know that there is a safe place to insert + // the conditional branch. We will change the preheader to have a conditional + // branch on LoopCond. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the exit block out of the loop that we are + // unswitching. We need to split this if there are other loop predecessors. + // Because the loop is in simplified form, *any* other predecessor is enough. + BasicBlock *UnswitchedBB; + if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { + (void)PredBB; + assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!"); + UnswitchedBB = LoopExitBB; + } else { + UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); + } + + BasicBlock *ParentBB = BI.getParent(); + + // Now splice the branch to gate reaching the new preheader and re-point its + // successors. + OldPH->getInstList().splice(std::prev(OldPH->end()), + BI.getParent()->getInstList(), BI); + OldPH->getTerminator()->eraseFromParent(); + BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); + BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); + + // Create a new unconditional branch that will continue the loop as a new + // terminator. + BranchInst::Create(ContinueBB, ParentBB); + + // Now we need to update the dominator tree. + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + // But if we split something off of the loop exit block then we also removed + // one of the predecessors for the loop exit block and may need to update its + // idom. + if (UnswitchedBB != LoopExitBB) + updateLoopExitIDom(LoopExitBB, L, DT); + + // Since this is an i1 condition we can also trivially replace uses of it + // within the loop with a constant. + replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); + + ++NumTrivial; + ++NumBranches; + return true; +} + +/// Unswitch a trivial switch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the switch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and that at least one of the successors is a loop +/// exit. This allows us to unswitch without duplicating the loop, making it +/// trivial. +/// +/// If this routine fails to unswitch the switch it returns false. +/// +/// If the switch can be unswitched, this routine splits the preheader and +/// copies the switch above that split. If the default case is one of the +/// exiting cases, it copies the non-exiting cases and points them at the new +/// preheader. If the default case is not exiting, it copies the exiting cases +/// and points the default at the preheader. It preserves loop simplified form +/// (splitting the exit blocks as necessary). It simplifies the switch within +/// the loop by removing now-dead cases. If the default case is one of those +/// unswitched, it replaces its destination with a new basic block containing +/// only unreachable. Such basic blocks, while technically loop exits, are not +/// considered for unswitching so this is a stable transform and the same +/// switch will not be revisited. If after unswitching there is only a single +/// in-loop successor, the switch is further simplified to an unconditional +/// branch. Still more cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, + LoopInfo &LI) { + DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); + Value *LoopCond = SI.getCondition(); + + // If this isn't switching on an invariant condition, we can't unswitch it. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector<BasicBlock *, 16> ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + SmallVector<int, 4> ExitCaseIndices; + for (auto Case : SI.cases()) { + auto *SuccBB = Case.getCaseSuccessor(); + if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin())) + ExitCaseIndices.push_back(Case.getCaseIndex()); + } + BasicBlock *DefaultExitBB = nullptr; + if (ExitBlockSet.count(SI.getDefaultDest()) && + !isa<PHINode>(SI.getDefaultDest()->begin()) && + !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) + DefaultExitBB = SI.getDefaultDest(); + else if (ExitCaseIndices.empty()) + return false; + + DEBUG(dbgs() << " unswitching trivial cases...\n"); + + SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases; + ExitCases.reserve(ExitCaseIndices.size()); + // We walk the case indices backwards so that we remove the last case first + // and don't disrupt the earlier indices. + for (unsigned Index : reverse(ExitCaseIndices)) { + auto CaseI = SI.case_begin() + Index; + // Save the value of this case. + ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); + // Delete the unswitched cases. + SI.removeCase(CaseI); + } + + // Check if after this all of the remaining cases point at the same + // successor. + BasicBlock *CommonSuccBB = nullptr; + if (SI.getNumCases() > 0 && + std::all_of(std::next(SI.case_begin()), SI.case_end(), + [&SI](const SwitchInst::CaseHandle &Case) { + return Case.getCaseSuccessor() == + SI.case_begin()->getCaseSuccessor(); + })) + CommonSuccBB = SI.case_begin()->getCaseSuccessor(); + + if (DefaultExitBB) { + // We can't remove the default edge so replace it with an edge to either + // the single common remaining successor (if we have one) or an unreachable + // block. + if (CommonSuccBB) { + SI.setDefaultDest(CommonSuccBB); + } else { + BasicBlock *ParentBB = SI.getParent(); + BasicBlock *UnreachableBB = BasicBlock::Create( + ParentBB->getContext(), + Twine(ParentBB->getName()) + ".unreachable_default", + ParentBB->getParent()); + new UnreachableInst(ParentBB->getContext(), UnreachableBB); + SI.setDefaultDest(UnreachableBB); + DT.addNewBlock(UnreachableBB, ParentBB); + } + } else { + // If we're not unswitching the default, we need it to match any cases to + // have a common successor or if we have no cases it is the common + // successor. + if (SI.getNumCases() == 0) + CommonSuccBB = SI.getDefaultDest(); + else if (SI.getDefaultDest() != CommonSuccBB) + CommonSuccBB = nullptr; + } + + // Split the preheader, so that we know that there is a safe place to insert + // the switch. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + OldPH->getTerminator()->eraseFromParent(); + + // Now add the unswitched switch. + auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); + + // Split any exit blocks with remaining in-loop predecessors. We walk in + // reverse so that we split in the same order as the cases appeared. This is + // purely for convenience of reading the resulting IR, but it doesn't cost + // anything really. + SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; + // Handle the default exit if necessary. + // FIXME: It'd be great if we could merge this with the loop below but LLVM's + // ranges aren't quite powerful enough yet. + if (DefaultExitBB && !pred_empty(DefaultExitBB)) { + auto *SplitBB = + SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); + updateLoopExitIDom(DefaultExitBB, L, DT); + DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; + } + // Note that we must use a reference in the for loop so that we update the + // container. + for (auto &CasePair : reverse(ExitCases)) { + // Grab a reference to the exit block in the pair so that we can update it. + BasicBlock *&ExitBB = CasePair.second; + + // If this case is the last edge into the exit block, we can simply reuse it + // as it will no longer be a loop exit. No mapping necessary. + if (pred_empty(ExitBB)) + continue; + + // Otherwise we need to split the exit block so that we retain an exit + // block from the loop and a target for the unswitched condition. + BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; + if (!SplitExitBB) { + // If this is the first time we see this, do the split and remember it. + SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); + updateLoopExitIDom(ExitBB, L, DT); + } + ExitBB = SplitExitBB; + } + + // Now add the unswitched cases. We do this in reverse order as we built them + // in reverse order. + for (auto CasePair : reverse(ExitCases)) { + ConstantInt *CaseVal = CasePair.first; + BasicBlock *UnswitchedBB = CasePair.second; + + NewSI->addCase(CaseVal, UnswitchedBB); + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + } + + // If the default was unswitched, re-point it and add explicit cases for + // entering the loop. + if (DefaultExitBB) { + NewSI->setDefaultDest(DefaultExitBB); + updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); + + // We removed all the exit cases, so we just copy the cases to the + // unswitched switch. + for (auto Case : SI.cases()) + NewSI->addCase(Case.getCaseValue(), NewPH); + } + + // If we ended up with a common successor for every path through the switch + // after unswitching, rewrite it to an unconditional branch to make it easy + // to recognize. Otherwise we potentially have to recognize the default case + // pointing at unreachable and other complexity. + if (CommonSuccBB) { + BasicBlock *BB = SI.getParent(); + SI.eraseFromParent(); + BranchInst::Create(CommonSuccBB, BB); + } + + DT.verifyDomTree(); + ++NumTrivial; + ++NumSwitches; + return true; +} + +/// This routine scans the loop to find a branch or switch which occurs before +/// any side effects occur. These can potentially be unswitched without +/// duplicating the loop. If a branch or switch is successfully unswitched the +/// scanning continues to see if subsequent branches or switches have become +/// trivial. Once all trivial candidates have been unswitched, this routine +/// returns. +/// +/// The return value indicates whether anything was unswitched (and therefore +/// changed). +static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, + LoopInfo &LI) { + bool Changed = false; + + // If loop header has only one reachable successor we should keep looking for + // trivial condition candidates in the successor as well. An alternative is + // to constant fold conditions and merge successors into loop header (then we + // only need to check header's terminator). The reason for not doing this in + // LoopUnswitch pass is that it could potentially break LoopPassManager's + // invariants. Folding dead branches could either eliminate the current loop + // or make other loops unreachable. LCSSA form might also not be preserved + // after deleting branches. The following code keeps traversing loop header's + // successors until it finds the trivial condition candidate (condition that + // is not a constant). Since unswitching generates branches with constant + // conditions, this scenario could be very common in practice. + BasicBlock *CurrentBB = L.getHeader(); + SmallPtrSet<BasicBlock *, 8> Visited; + Visited.insert(CurrentBB); + do { + // Check if there are any side-effecting instructions (e.g. stores, calls, + // volatile loads) in the part of the loop that the code *would* execute + // without unswitching. + if (llvm::any_of(*CurrentBB, + [](Instruction &I) { return I.mayHaveSideEffects(); })) + return Changed; + + TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + + if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { + // Don't bother trying to unswitch past a switch with a constant + // condition. This should be removed prior to running this pass by + // simplify-cfg. + if (isa<Constant>(SI->getCondition())) + return Changed; + + if (!unswitchTrivialSwitch(L, *SI, DT, LI)) + // Coludn't unswitch this one so we're done. + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // If unswitching turned the terminator into an unconditional branch then + // we can continue. The unswitching logic specifically works to fold any + // cases it can into an unconditional branch to make it easier to + // recognize here. + auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); + if (!BI || BI->isConditional()) + return Changed; + + CurrentBB = BI->getSuccessor(0); + continue; + } + + auto *BI = dyn_cast<BranchInst>(CurrentTerm); + if (!BI) + // We do not understand other terminator instructions. + return Changed; + + // Don't bother trying to unswitch past an unconditional branch or a branch + // with a constant value. These should be removed by simplify-cfg prior to + // running this pass. + if (!BI->isConditional() || isa<Constant>(BI->getCondition())) + return Changed; + + // Found a trivial condition candidate: non-foldable conditional branch. If + // we fail to unswitch this, we can't do anything else that is trivial. + if (!unswitchTrivialBranch(L, *BI, DT, LI)) + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // We unswitched the branch. This should always leave us with an + // unconditional branch that we can follow now. + BI = cast<BranchInst>(CurrentBB->getTerminator()); + assert(!BI->isConditional() && + "Cannot form a conditional branch by unswitching1"); + CurrentBB = BI->getSuccessor(0); + + // When continuing, if we exit the loop or reach a previous visited block, + // then we can not reach any trivial condition candidates (unfoldable + // branch instructions or switch instructions) and no unswitch can happen. + } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); + + return Changed; +} + +/// Unswitch control flow predicated on loop invariant conditions. +/// +/// This first hoists all branches or switches which are trivial (IE, do not +/// require duplicating any part of the loop) out of the loop body. It then +/// looks at other loop invariant control flows and tries to unswitch those as +/// well by cloning the loop if the result is small enough. +static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC) { + assert(L.isLCSSAForm(DT) && + "Loops must be in LCSSA form before unswitching."); + bool Changed = false; + + // Must be in loop simplified form: we need a preheader and dedicated exits. + if (!L.isLoopSimplifyForm()) + return false; + + // Try trivial unswitch first before loop over other basic blocks in the loop. + Changed |= unswitchAllTrivialConditions(L, DT, LI); + + // FIXME: Add support for non-trivial unswitching by cloning the loop. + + return Changed; +} + +PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function &F = *L.getHeader()->getParent(); + (void)F; + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); + + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) + return PreservedAnalyses::all(); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + AR.DT.verifyDomTree(); +#endif + return getLoopPassPreservedAnalyses(); +} + +namespace { +class SimpleLoopUnswitchLegacyPass : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { + initializeSimpleLoopUnswitchLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + getLoopAnalysisUsage(AU); + } +}; +} // namespace + +bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); + + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + + bool Changed = unswitchLoop(*L, DT, LI, AC); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + DT.verifyDomTree(); +#endif + return Changed; +} + +char SimpleLoopUnswitchLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) + +Pass *llvm::createSimpleLoopUnswitchLegacyPass() { + return new SimpleLoopUnswitchLegacyPass(); +} |
