diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-08-22 19:00:43 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-11-13 20:39:49 +0000 |
| commit | fe6060f10f634930ff71b7c50291ddc610da2475 (patch) | |
| tree | 1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/llvm/lib/Transforms/Utils | |
| parent | b61bce17f346d79cecfd8f195a64b10f77be43b1 (diff) | |
| parent | 344a3780b2e33f6ca763666c380202b18aab72a3 (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils')
50 files changed, 6025 insertions, 2593 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp index ccdcf7cbce38..8cd16ca3906f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -134,11 +134,11 @@ static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) { auto PtrPhi = Builder.CreatePHI(Str->getType(), 2); PtrPhi->addIncoming(Str, Prev); - auto PtrNext = Builder.CreateGEP(PtrPhi, One); + auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One); PtrPhi->addIncoming(PtrNext, While); // Condition for the while loop. - auto Data = Builder.CreateLoad(PtrPhi); + auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi); auto Cmp = Builder.CreateICmpEQ(Data, CharZero); Builder.CreateCondBr(Cmp, WhileDone, While); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index 0908b361a4d4..e789194eb3ab 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -70,9 +70,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include <utility> using namespace llvm; +using namespace sampleprofutil; #define DEBUG_TYPE "add-discriminators" @@ -172,6 +174,10 @@ static bool addDiscriminators(Function &F) { if (NoDiscriminators || !F.getSubprogram()) return false; + // Create FSDiscriminatorVariable if flow sensitive discriminators are used. + if (EnableFSDiscriminator) + createFSDiscriminatorVariable(F.getParent()); + bool Changed = false; using Location = std::pair<StringRef, unsigned>; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index 3daff3b4430b..d689e04da36f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "assume-builder" - #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/MapVector.h" @@ -27,6 +25,7 @@ using namespace llvm; +namespace llvm { cl::opt<bool> ShouldPreserveAllAttributes( "assume-preserve-all", cl::init(false), cl::Hidden, cl::desc("enable preservation of all attrbitues. even those that are " @@ -36,6 +35,9 @@ cl::opt<bool> EnableKnowledgeRetention( "enable-knowledge-retention", cl::init(false), cl::Hidden, cl::desc( "enable preservation of attributes throughout code transformation")); +} // namespace llvm + +#define DEBUG_TYPE "assume-builder" STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder"); STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built"); @@ -65,7 +67,7 @@ bool isUsefullToPreserve(Attribute::AttrKind Kind) { /// This function will try to transform the given knowledge into a more /// canonical one. the canonical knowledge maybe the given one. -RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) { +RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, DataLayout DL) { switch (RK.AttrKind) { default: return RK; @@ -76,8 +78,7 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) { Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) { if (auto *GEP = dyn_cast<GEPOperator>(Strip)) RK.ArgValue = - MinAlign(RK.ArgValue, - GEP->getMaxPreservedAlignment(M->getDataLayout()).value()); + MinAlign(RK.ArgValue, GEP->getMaxPreservedAlignment(DL).value()); }); RK.WasOn = V; return RK; @@ -85,8 +86,8 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) { case Attribute::Dereferenceable: case Attribute::DereferenceableOrNull: { int64_t Offset = 0; - Value *V = GetPointerBaseWithConstantOffset( - RK.WasOn, Offset, M->getDataLayout(), /*AllowNonInBounds*/ false); + Value *V = GetPointerBaseWithConstantOffset(RK.WasOn, Offset, DL, + /*AllowNonInBounds*/ false); if (Offset < 0) return RK; RK.ArgValue = RK.ArgValue + Offset; @@ -103,16 +104,16 @@ struct AssumeBuilderState { using MapKey = std::pair<Value *, Attribute::AttrKind>; SmallMapVector<MapKey, unsigned, 8> AssumedKnowledgeMap; - Instruction *InstBeingRemoved = nullptr; + Instruction *InstBeingModified = nullptr; AssumptionCache* AC = nullptr; DominatorTree* DT = nullptr; AssumeBuilderState(Module *M, Instruction *I = nullptr, AssumptionCache *AC = nullptr, DominatorTree *DT = nullptr) - : M(M), InstBeingRemoved(I), AC(AC), DT(DT) {} + : M(M), InstBeingModified(I), AC(AC), DT(DT) {} bool tryToPreserveWithoutAddingAssume(RetainedKnowledge RK) { - if (!InstBeingRemoved || !RK.WasOn) + if (!InstBeingModified || !RK.WasOn) return false; bool HasBeenPreserved = false; Use* ToUpdate = nullptr; @@ -120,13 +121,12 @@ struct AssumeBuilderState { RK.WasOn, {RK.AttrKind}, AC, [&](RetainedKnowledge RKOther, Instruction *Assume, const CallInst::BundleOpInfo *Bundle) { - if (!isValidAssumeForContext(Assume, InstBeingRemoved, DT)) + if (!isValidAssumeForContext(Assume, InstBeingModified, DT)) return false; if (RKOther.ArgValue >= RK.ArgValue) { HasBeenPreserved = true; return true; - } else if (isValidAssumeForContext(InstBeingRemoved, Assume, - DT)) { + } else if (isValidAssumeForContext(InstBeingModified, Assume, DT)) { HasBeenPreserved = true; IntrinsicInst *Intr = cast<IntrinsicInst>(Assume); ToUpdate = &Intr->op_begin()[Bundle->Begin + ABA_Argument]; @@ -152,7 +152,7 @@ struct AssumeBuilderState { } if (auto *Arg = dyn_cast<Argument>(RK.WasOn)) { if (Arg->hasAttribute(RK.AttrKind) && - (!Attribute::doesAttrKindHaveArgument(RK.AttrKind) || + (!Attribute::isIntAttrKind(RK.AttrKind) || Arg->getAttribute(RK.AttrKind).getValueAsInt() >= RK.ArgValue)) return false; return true; @@ -162,14 +162,14 @@ struct AssumeBuilderState { if (RK.WasOn->use_empty()) return false; Use *SingleUse = RK.WasOn->getSingleUndroppableUse(); - if (SingleUse && SingleUse->getUser() == InstBeingRemoved) + if (SingleUse && SingleUse->getUser() == InstBeingModified) return false; } return true; } void addKnowledge(RetainedKnowledge RK) { - RK = canonicalizedKnowledge(RK, M); + RK = canonicalizedKnowledge(RK, M->getDataLayout()); if (!isKnowledgeWorthPreserving(RK)) return; @@ -206,8 +206,12 @@ struct AssumeBuilderState { auto addAttrList = [&](AttributeList AttrList) { for (unsigned Idx = AttributeList::FirstArgIndex; Idx < AttrList.getNumAttrSets(); Idx++) - for (Attribute Attr : AttrList.getAttributes(Idx)) - addAttribute(Attr, Call->getArgOperand(Idx - 1)); + for (Attribute Attr : AttrList.getAttributes(Idx)) { + bool IsPoisonAttr = Attr.hasAttribute(Attribute::NonNull) || + Attr.hasAttribute(Attribute::Alignment); + if (!IsPoisonAttr || Call->isPassingUndefUB(Idx - 1)) + addAttribute(Attr, Call->getArgOperand(Idx - 1)); + } for (Attribute Attr : AttrList.getFnAttributes()) addAttribute(Attr, nullptr); }; @@ -216,7 +220,7 @@ struct AssumeBuilderState { addAttrList(Fn->getAttributes()); } - IntrinsicInst *build() { + AssumeInst *build() { if (AssumedKnowledgeMap.empty()) return nullptr; if (!DebugCounter::shouldExecute(BuildAssumeCounter)) @@ -240,7 +244,7 @@ struct AssumeBuilderState { NumBundlesInAssumes++; } NumAssumeBuilt++; - return cast<IntrinsicInst>(CallInst::Create( + return cast<AssumeInst>(CallInst::Create( FnAssume, ArrayRef<Value *>({ConstantInt::getTrue(C)}), OpBundle)); } @@ -278,7 +282,7 @@ struct AssumeBuilderState { } // namespace -IntrinsicInst *llvm::buildAssumeFromInst(Instruction *I) { +AssumeInst *llvm::buildAssumeFromInst(Instruction *I) { if (!EnableKnowledgeRetention) return nullptr; AssumeBuilderState Builder(I->getModule()); @@ -292,13 +296,38 @@ void llvm::salvageKnowledge(Instruction *I, AssumptionCache *AC, return; AssumeBuilderState Builder(I->getModule(), I, AC, DT); Builder.addInstruction(I); - if (IntrinsicInst *Intr = Builder.build()) { + if (auto *Intr = Builder.build()) { Intr->insertBefore(I); if (AC) AC->registerAssumption(Intr); } } +AssumeInst * +llvm::buildAssumeFromKnowledge(ArrayRef<RetainedKnowledge> Knowledge, + Instruction *CtxI, AssumptionCache *AC, + DominatorTree *DT) { + AssumeBuilderState Builder(CtxI->getModule(), CtxI, AC, DT); + for (const RetainedKnowledge &RK : Knowledge) + Builder.addKnowledge(RK); + return Builder.build(); +} + +RetainedKnowledge llvm::simplifyRetainedKnowledge(AssumeInst *Assume, + RetainedKnowledge RK, + AssumptionCache *AC, + DominatorTree *DT) { + AssumeBuilderState Builder(Assume->getModule(), Assume, AC, DT); + RK = canonicalizedKnowledge(RK, Assume->getModule()->getDataLayout()); + + if (!Builder.isKnowledgeWorthPreserving(RK)) + return RetainedKnowledge::none(); + + if (Builder.tryToPreserveWithoutAddingAssume(RK)) + return RetainedKnowledge::none(); + return RK; +} + namespace { struct AssumeSimplify { @@ -344,7 +373,8 @@ struct AssumeSimplify { for (IntrinsicInst *Assume : CleanupToDo) { auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0)); if (!Arg || Arg->isZero() || - (!ForceCleanup && !isAssumeWithEmptyBundle(*Assume))) + (!ForceCleanup && + !isAssumeWithEmptyBundle(cast<AssumeInst>(*Assume)))) continue; MadeChange = true; if (ForceCleanup) @@ -387,11 +417,12 @@ struct AssumeSimplify { CleanupToDo.insert(Assume); continue; } - RetainedKnowledge RK = getKnowledgeFromBundle(*Assume, BOI); + RetainedKnowledge RK = + getKnowledgeFromBundle(cast<AssumeInst>(*Assume), BOI); if (auto *Arg = dyn_cast_or_null<Argument>(RK.WasOn)) { bool HasSameKindAttr = Arg->hasAttribute(RK.AttrKind); if (HasSameKindAttr) - if (!Attribute::doesAttrKindHaveArgument(RK.AttrKind) || + if (!Attribute::isIntAttrKind(RK.AttrKind) || Arg->getAttribute(RK.AttrKind).getValueAsInt() >= RK.ArgValue) { RemoveFromAssume(); @@ -446,7 +477,8 @@ struct AssumeSimplify { for (IntrinsicInst *I : make_range(Begin, End)) { CleanupToDo.insert(I); for (CallInst::BundleOpInfo &BOI : I->bundle_op_infos()) { - RetainedKnowledge RK = getKnowledgeFromBundle(*I, BOI); + RetainedKnowledge RK = + getKnowledgeFromBundle(cast<AssumeInst>(*I), BOI); if (!RK) continue; Builder.addKnowledge(RK); @@ -466,7 +498,7 @@ struct AssumeSimplify { InsertPt = It->getNextNode(); break; } - IntrinsicInst *MergedAssume = Builder.build(); + auto *MergedAssume = Builder.build(); if (!MergedAssume) return; MadeChange = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 6bcd42c4c6d8..ee933b638a23 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -124,11 +125,9 @@ bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU, // Collect all dead blocks. std::vector<BasicBlock*> DeadBlocks; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - if (!Reachable.count(&*I)) { - BasicBlock *BB = &*I; - DeadBlocks.push_back(BB); - } + for (BasicBlock &BB : F) + if (!Reachable.count(&BB)) + DeadBlocks.push_back(&BB); // Delete the dead blocks. DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs); @@ -209,9 +208,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // Can't merge if there is PHI loop. for (PHINode &PN : BB->phis()) - for (Value *IncValue : PN.incoming_values()) - if (IncValue == &PN) - return false; + if (llvm::is_contained(PN.incoming_values(), &PN)) + return false; LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into " << PredBB->getName() << "\n"); @@ -230,21 +228,22 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // These dominator edges will be redirected from Pred. std::vector<DominatorTree::UpdateType> Updates; if (DTU) { - SmallSetVector<BasicBlock *, 2> UniqueSuccessors(succ_begin(BB), - succ_end(BB)); - Updates.reserve(1 + (2 * UniqueSuccessors.size())); + SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB)); + SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB), + succ_begin(PredBB)); + Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1); // Add insert edges first. Experimentally, for the particular case of two // blocks that can be merged, with a single successor and single predecessor // respectively, it is beneficial to have all insert updates first. Deleting // edges first may lead to unreachable blocks, followed by inserting edges // making the blocks reachable again. Such DT updates lead to high compile // times. We add inserts before deletes here to reduce compile time. - for (BasicBlock *UniqueSuccessor : UniqueSuccessors) - // This successor of BB may already have PredBB as a predecessor. - if (!llvm::is_contained(successors(PredBB), UniqueSuccessor)) - Updates.push_back({DominatorTree::Insert, PredBB, UniqueSuccessor}); - for (BasicBlock *UniqueSuccessor : UniqueSuccessors) - Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); + for (BasicBlock *SuccOfBB : SuccsOfBB) + // This successor of BB may already be a PredBB's successor. + if (!SuccsOfPredBB.contains(SuccOfBB)) + Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); + for (BasicBlock *SuccOfBB : SuccsOfBB) + Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); } @@ -299,17 +298,11 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, if (MemDep) MemDep->invalidateCachedPredecessors(); - // Finally, erase the old block and update dominator info. - if (DTU) { - assert(BB->getInstList().size() == 1 && - isa<UnreachableInst>(BB->getTerminator()) && - "The successor list of BB isn't empty before " - "applying corresponding DTU updates."); + if (DTU) DTU->applyUpdates(Updates); - DTU->deleteBB(BB); - } else { - BB->eraseFromParent(); // Nuke BB if DTU is nullptr. - } + + // Finally, erase the old block and update dominator info. + DeleteDeadBlock(BB, DTU); return true; } @@ -409,7 +402,8 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { /// - Keep track of non-overlapping fragments. static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { SmallVector<DbgValueInst *, 8> ToBeRemoved; - DenseMap<DebugVariable, std::pair<Value *, DIExpression *> > VariableMap; + DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>> + VariableMap; for (auto &I : *BB) { if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) { DebugVariable Key(DVI->getVariable(), @@ -418,10 +412,10 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { auto VMI = VariableMap.find(Key); // Update the map if we found a new value/expression describing the // variable, or if the variable wasn't mapped already. - if (VMI == VariableMap.end() || - VMI->second.first != DVI->getValue() || + SmallVector<Value *, 4> Values(DVI->getValues()); + if (VMI == VariableMap.end() || VMI->second.first != Values || VMI->second.second != DVI->getExpression()) { - VariableMap[Key] = { DVI->getValue(), DVI->getExpression() }; + VariableMap[Key] = {Values, DVI->getExpression()}; continue; } // Found an identical mapping. Remember the instruction for later removal. @@ -501,13 +495,20 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, const Twine &BBName) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); - // If this is a critical edge, let SplitCriticalEdge do it. Instruction *LatchTerm = BB->getTerminator(); - if (SplitCriticalEdge( - LatchTerm, SuccNum, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(), - BBName)) - return LatchTerm->getSuccessor(SuccNum); + + CriticalEdgeSplittingOptions Options = + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(); + + if ((isCriticalEdge(LatchTerm, SuccNum, Options.MergeIdenticalEdges))) { + // If it is a critical edge, and the succesor is an exception block, handle + // the split edge logic in this specific function + if (Succ->isEHPad()) + return ehAwareSplitEdge(BB, Succ, nullptr, nullptr, Options, BBName); + + // If this is a critical edge, let SplitKnownCriticalEdge do it. + return SplitKnownCriticalEdge(LatchTerm, SuccNum, Options, BBName); + } // If the edge isn't critical, then BB has a single successor or Succ has a // single pred. Split the block. @@ -527,6 +528,218 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName); } +void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { + if (auto *II = dyn_cast<InvokeInst>(TI)) + II->setUnwindDest(Succ); + else if (auto *CS = dyn_cast<CatchSwitchInst>(TI)) + CS->setUnwindDest(Succ); + else if (auto *CR = dyn_cast<CleanupReturnInst>(TI)) + CR->setUnwindDest(Succ); + else + llvm_unreachable("unexpected terminator instruction"); +} + +void llvm::updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, + BasicBlock *NewPred, PHINode *Until) { + int BBIdx = 0; + for (PHINode &PN : DestBB->phis()) { + // We manually update the LandingPadReplacement PHINode and it is the last + // PHI Node. So, if we find it, we are done. + if (Until == &PN) + break; + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN.getIncomingBlock(BBIdx) != OldPred) + BBIdx = PN.getBasicBlockIndex(OldPred); + + assert(BBIdx != -1 && "Invalid PHI Index!"); + PN.setIncomingBlock(BBIdx, NewPred); + } +} + +BasicBlock *llvm::ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, + LandingPadInst *OriginalPad, + PHINode *LandingPadReplacement, + const CriticalEdgeSplittingOptions &Options, + const Twine &BBName) { + + auto *PadInst = Succ->getFirstNonPHI(); + if (!LandingPadReplacement && !PadInst->isEHPad()) + return SplitEdge(BB, Succ, Options.DT, Options.LI, Options.MSSAU, BBName); + + auto *LI = Options.LI; + SmallVector<BasicBlock *, 4> LoopPreds; + // Check if extra modifications will be required to preserve loop-simplify + // form after splitting. If it would require splitting blocks with IndirectBr + // terminators, bail out if preserving loop-simplify form is requested. + if (Options.PreserveLoopSimplify && LI) { + if (Loop *BBLoop = LI->getLoopFor(BB)) { + + // The only way that we can break LoopSimplify form by splitting a + // critical edge is when there exists some edge from BBLoop to Succ *and* + // the only edge into Succ from outside of BBLoop is that of NewBB after + // the split. If the first isn't true, then LoopSimplify still holds, + // NewBB is the new exit block and it has no non-loop predecessors. If the + // second isn't true, then Succ was not in LoopSimplify form prior to + // the split as it had a non-loop predecessor. In both of these cases, + // the predecessor must be directly in BBLoop, not in a subloop, or again + // LoopSimplify doesn't hold. + for (BasicBlock *P : predecessors(Succ)) { + if (P == BB) + continue; // The new block is known. + if (LI->getLoopFor(P) != BBLoop) { + // Loop is not in LoopSimplify form, no need to re simplify after + // splitting edge. + LoopPreds.clear(); + break; + } + LoopPreds.push_back(P); + } + // Loop-simplify form can be preserved, if we can split all in-loop + // predecessors. + if (any_of(LoopPreds, [](BasicBlock *Pred) { + return isa<IndirectBrInst>(Pred->getTerminator()); + })) { + return nullptr; + } + } + } + + auto *NewBB = + BasicBlock::Create(BB->getContext(), BBName, BB->getParent(), Succ); + setUnwindEdgeTo(BB->getTerminator(), NewBB); + updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); + + if (LandingPadReplacement) { + auto *NewLP = OriginalPad->clone(); + auto *Terminator = BranchInst::Create(Succ, NewBB); + NewLP->insertBefore(Terminator); + LandingPadReplacement->addIncoming(NewLP, NewBB); + } else { + Value *ParentPad = nullptr; + if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst)) + ParentPad = FuncletPad->getParentPad(); + else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst)) + ParentPad = CatchSwitch->getParentPad(); + else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(PadInst)) + ParentPad = CleanupPad->getParentPad(); + else if (auto *LandingPad = dyn_cast<LandingPadInst>(PadInst)) + ParentPad = LandingPad->getParent(); + else + llvm_unreachable("handling for other EHPads not implemented yet"); + + auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, BBName, NewBB); + CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); + } + + auto *DT = Options.DT; + auto *MSSAU = Options.MSSAU; + if (!DT && !LI) + return NewBB; + + if (DT) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + SmallVector<DominatorTree::UpdateType, 3> Updates; + + Updates.push_back({DominatorTree::Insert, BB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, Succ}); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + + DTU.applyUpdates(Updates); + DTU.flush(); + + if (MSSAU) { + MSSAU->applyUpdates(Updates, *DT); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } + } + + if (LI) { + if (Loop *BBLoop = LI->getLoopFor(BB)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (Loop *SuccLoop = LI->getLoopFor(Succ)) { + if (BBLoop == SuccLoop) { + // Both in the same loop, the NewBB joins loop. + SuccLoop->addBasicBlockToLoop(NewBB, *LI); + } else if (BBLoop->contains(SuccLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + BBLoop->addBasicBlockToLoop(NewBB, *LI); + } else if (SuccLoop->contains(BBLoop)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + SuccLoop->addBasicBlockToLoop(NewBB, *LI); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(SuccLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (Loop *P = SuccLoop->getParentLoop()) + P->addBasicBlockToLoop(NewBB, *LI); + } + } + + // If BB is in a loop and Succ is outside of that loop, we may need to + // update LoopSimplify form and LCSSA form. + if (!BBLoop->contains(Succ)) { + assert(!BBLoop->contains(NewBB) && + "Split point for loop exit is contained in loop!"); + + // Update LCSSA form in the newly created exit block. + if (Options.PreserveLCSSA) { + createPHIsForSplitLoopExit(BB, NewBB, Succ); + } + + if (!LoopPreds.empty()) { + BasicBlock *NewExitBB = SplitBlockPredecessors( + Succ, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA); + if (Options.PreserveLCSSA) + createPHIsForSplitLoopExit(LoopPreds, NewExitBB, Succ); + } + } + } + } + + return NewBB; +} + +void llvm::createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, + BasicBlock *SplitBB, BasicBlock *DestBB) { + // SplitBB shouldn't have anything non-trivial in it yet. + assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() || + SplitBB->isLandingPad()) && + "SplitBB has non-PHI nodes!"); + + // For each PHI in the destination block. + for (PHINode &PN : DestBB->phis()) { + int Idx = PN.getBasicBlockIndex(SplitBB); + assert(Idx >= 0 && "Invalid Block Index"); + Value *V = PN.getIncomingValue(Idx); + + // If the input is a PHI which already satisfies LCSSA, don't create + // a new one. + if (const PHINode *VP = dyn_cast<PHINode>(V)) + if (VP->getParent() == SplitBB) + continue; + + // Otherwise a new PHI is needed. Create one and populate it. + PHINode *NewPN = PHINode::Create( + PN.getType(), Preds.size(), "split", + SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); + for (BasicBlock *BB : Preds) + NewPN->addIncoming(V, BB); + + // Update the original PHI. + PN.setIncomingValue(Idx, NewPN); + } +} + unsigned llvm::SplitAllCriticalEdges(Function &F, const CriticalEdgeSplittingOptions &Options) { @@ -553,8 +766,10 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, BBName); } BasicBlock::iterator SplitIt = SplitPt->getIterator(); - while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) + while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) { ++SplitIt; + assert(SplitIt != SplitPt->getParent()->end()); + } std::string Name = BBName.str(); BasicBlock *New = Old->splitBasicBlock( SplitIt, Name.empty() ? Old->getName() + ".split" : Name); @@ -568,8 +783,8 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, if (DTU) { SmallVector<DominatorTree::UpdateType, 8> Updates; // Old dominates New. New node dominates all other nodes dominated by Old. - SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New), - succ_end(New)); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New), + succ_end(New)); Updates.push_back({DominatorTree::Insert, Old, New}); Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size()); for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) { @@ -634,8 +849,8 @@ BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, SmallVector<DominatorTree::UpdateType, 8> DTUpdates; // New dominates Old. The predecessor nodes of the Old node dominate // New node. - SmallSetVector<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New), - pred_end(New)); + SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New), + pred_end(New)); DTUpdates.push_back({DominatorTree::Insert, New, Old}); DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size()); for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) { @@ -666,7 +881,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, if (DTU) { // Recalculation of DomTree is needed when updating a forward DomTree and // the Entry BB is replaced. - if (NewBB == &NewBB->getParent()->getEntryBlock() && DTU->hasDomTree()) { + if (NewBB->isEntryBlock() && DTU->hasDomTree()) { // The entry block was removed and there is no external interface for // the dominator tree to be notified of this change. In this corner-case // we recalculate the entire tree. @@ -674,7 +889,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } else { // Split block expects NewBB to have a non-empty set of predecessors. SmallVector<DominatorTree::UpdateType, 8> Updates; - SmallSetVector<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end()); + SmallPtrSet<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end()); Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); Updates.reserve(Updates.size() + 2 * UniquePreds.size()); for (auto *UniquePred : UniquePreds) { @@ -685,7 +900,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } } else if (DT) { if (OldBB == DT->getRootNode()->getBlock()) { - assert(NewBB == &NewBB->getParent()->getEntryBlock()); + assert(NewBB->isEntryBlock()); DT->setNewRoot(NewBB); } else { // Split block expects NewBB to have a non-empty set of predecessors. @@ -1083,9 +1298,8 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. - for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) { - Value *V = *i; + for (Use &Op : NewRet->operands()) { + Value *V = Op; Instruction *NewBC = nullptr; if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { // Return value might be bitcasted. Clone and insert it before the @@ -1093,7 +1307,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, V = BCI->getOperand(0); NewBC = BCI->clone(); Pred->getInstList().insert(NewRet->getIterator(), NewBC); - *i = NewBC; + Op = NewBC; } Instruction *NewEV = nullptr; @@ -1105,7 +1319,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, Pred->getInstList().insert(NewBC->getIterator(), NewEV); } else { Pred->getInstList().insert(NewRet->getIterator(), NewEV); - *i = NewEV; + Op = NewEV; } } @@ -1116,7 +1330,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, } else if (NewBC) NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); else - *i = PN->getIncomingValueForBlock(Pred); + Op = PN->getIncomingValueForBlock(Pred); } } } @@ -1141,8 +1355,8 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); if (DTU) { - SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail), - succ_end(Tail)); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail), + succ_end(Tail)); Updates.push_back({DominatorTree::Insert, Head, Tail}); Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size()); for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) { @@ -1242,8 +1456,8 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); } -Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, - BasicBlock *&IfFalse) { +BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); BasicBlock *Pred1 = nullptr; BasicBlock *Pred2 = nullptr; @@ -1309,7 +1523,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, return nullptr; } - return Pred1Br->getCondition(); + return Pred1Br; } // Ok, if we got here, both predecessors end with an unconditional branch to @@ -1331,7 +1545,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, IfTrue = Pred2; IfFalse = Pred1; } - return BI->getCondition(); + return BI; } // After creating a control flow hub, the operands of PHINodes in an outgoing diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 939a1a3a868d..1bb80be8ef99 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -100,46 +100,19 @@ PreservedAnalyses BreakCriticalEdgesPass::run(Function &F, // Implementation of the external critical edge manipulation functions //===----------------------------------------------------------------------===// -/// When a loop exit edge is split, LCSSA form may require new PHIs in the new -/// exit block. This function inserts the new PHIs, as needed. Preds is a list -/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is -/// the old loop exit, now the successor of SplitBB. -static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, - BasicBlock *SplitBB, - BasicBlock *DestBB) { - // SplitBB shouldn't have anything non-trivial in it yet. - assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() || - SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!"); - - // For each PHI in the destination block. - for (PHINode &PN : DestBB->phis()) { - unsigned Idx = PN.getBasicBlockIndex(SplitBB); - Value *V = PN.getIncomingValue(Idx); - - // If the input is a PHI which already satisfies LCSSA, don't create - // a new one. - if (const PHINode *VP = dyn_cast<PHINode>(V)) - if (VP->getParent() == SplitBB) - continue; - - // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create( - PN.getType(), Preds.size(), "split", - SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) - NewPN->addIncoming(V, Preds[i]); - - // Update the original PHI. - PN.setIncomingValue(Idx, NewPN); - } -} - BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options, const Twine &BBName) { if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) return nullptr; + return SplitKnownCriticalEdge(TI, SuccNum, Options, BBName); +} + +BasicBlock * +llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, + const CriticalEdgeSplittingOptions &Options, + const Twine &BBName) { assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index dba5403f272a..35e22f7a57e2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -44,7 +44,6 @@ STATISTIC(NumSExtArg, "Number of arguments inferred as signext"); STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns"); -STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); STATISTIC(NumReturnedArg, "Number of arguments inferred as returned"); STATISTIC(NumWillReturn, "Number of functions inferred as willreturn"); @@ -166,6 +165,14 @@ static bool setArgsNoUndef(Function &F) { return Changed; } +static bool setArgNoUndef(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoUndef)) + return false; + F.addParamAttr(ArgNo, Attribute::NoUndef); + ++NumNoUndef; + return true; +} + static bool setRetAndArgsNoUndef(Function &F) { return setRetNoUndef(F) | setArgsNoUndef(F); } @@ -249,12 +256,20 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc_strcpy: - case LibFunc_strncpy: case LibFunc_strcat: case LibFunc_strncat: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setReturnedArg(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + return Changed; + case LibFunc_strcpy: + case LibFunc_strncpy: + Changed |= setReturnedArg(F, 0); LLVM_FALLTHROUGH; case LibFunc_stpcpy: case LibFunc_stpncpy: @@ -323,8 +338,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc_strdup: case LibFunc_strndup: + Changed |= setArgNoUndef(F, 1); + LLVM_FALLTHROUGH; + case LibFunc_strdup: Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); @@ -383,7 +400,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_malloc: case LibFunc_vec_malloc: Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); @@ -471,10 +488,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setArgNoUndef(F, 1); return Changed; case LibFunc_reallocf: Changed |= setRetNoUndef(F); Changed |= setWillReturn(F); + Changed |= setArgNoUndef(F, 1); return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. @@ -517,7 +536,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_aligned_alloc: Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); @@ -548,8 +567,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_calloc: case LibFunc_vec_calloc: - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); @@ -833,7 +851,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_valloc: Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); @@ -908,8 +926,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 3); return Changed; - case LibFunc_dunder_strdup: case LibFunc_dunder_strndup: + Changed |= setArgNoUndef(F, 1); + LLVM_FALLTHROUGH; + case LibFunc_dunder_strdup: Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index bf08bf274737..87868251036c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -490,11 +490,8 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee, // If byval is used, this must be a pointer type, and the byval type must // match the element type. Update it if present. - if (ArgAttrs.getByValType()) { - Type *NewTy = Callee->getParamByValType(ArgNo); - ArgAttrs.addByValAttr( - NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType()); - } + if (ArgAttrs.getByValType()) + ArgAttrs.addByValAttr(Callee->getParamByValType(ArgNo)); NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs)); AttributeChanged = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp index 6ab061510a60..0ac9a5aaa425 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -44,7 +44,6 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo, DebugInfoFinder *DIFinder) { - DenseMap<const MDNode *, MDNode *> Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName() + NameSuffix); @@ -72,7 +71,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, } if (CodeInfo) { - CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; } return NewBB; @@ -83,8 +82,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, + CloneFunctionChangeType Changes, + SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { @@ -95,6 +94,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(VMap.count(&I) && "No mapping from source argument specified!"); #endif + bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly; + // Copy all attributes other than those stored in the AttributeList. We need // to remap the parameter indices of the AttributeList. AttributeList NewAttrs = NewFunc->getAttributes(); @@ -123,45 +124,54 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), OldAttrs.getRetAttributes(), NewArgAttrs)); - bool MustCloneSP = - OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); - DISubprogram *SP = OldFunc->getSubprogram(); - if (SP) { - assert(!MustCloneSP || ModuleLevelChanges); - // Add mappings for some DebugInfo nodes that we don't want duplicated - // even if they're distinct. - auto &MD = VMap.MD(); - MD[SP->getUnit()].reset(SP->getUnit()); - MD[SP->getType()].reset(SP->getType()); - MD[SP->getFile()].reset(SP->getFile()); - // If we're not cloning into the same module, no need to clone the - // subprogram - if (!MustCloneSP) - MD[SP].reset(SP); - } - // Everything else beyond this point deals with function instructions, // so if we are dealing with a function declaration, we're done. if (OldFunc->isDeclaration()) return; - // When we remap instructions, we want to avoid duplicating inlined - // DISubprograms, so record all subprograms we find as we duplicate - // instructions and then freeze them in the MD map. - // We also record information about dbg.value and dbg.declare to avoid - // duplicating the types. - DebugInfoFinder DIFinder; + // When we remap instructions within the same module, we want to avoid + // duplicating inlined DISubprograms, so record all subprograms we find as we + // duplicate instructions and then freeze them in the MD map. We also record + // information about dbg.value and dbg.declare to avoid duplicating the + // types. + Optional<DebugInfoFinder> DIFinder; + + // Track the subprogram attachment that needs to be cloned to fine-tune the + // mapping within the same module. + DISubprogram *SPClonedWithinModule = nullptr; + if (Changes < CloneFunctionChangeType::DifferentModule) { + assert((NewFunc->getParent() == nullptr || + NewFunc->getParent() == OldFunc->getParent()) && + "Expected NewFunc to have the same parent, or no parent"); + + // Need to find subprograms, types, and compile units. + DIFinder.emplace(); + + SPClonedWithinModule = OldFunc->getSubprogram(); + if (SPClonedWithinModule) + DIFinder->processSubprogram(SPClonedWithinModule); + } else { + assert((NewFunc->getParent() == nullptr || + NewFunc->getParent() != OldFunc->getParent()) && + "Expected NewFunc to have different parents, or no parent"); + + if (Changes == CloneFunctionChangeType::DifferentModule) { + assert(NewFunc->getParent() && + "Need parent of new function to maintain debug info invariants"); + + // Need to find all the compile units. + DIFinder.emplace(); + } + } // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. - for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); - BI != BE; ++BI) { - const BasicBlock &BB = *BI; + for (const BasicBlock &BB : *OldFunc) { // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, - ModuleLevelChanges ? &DIFinder : nullptr); + DIFinder ? &*DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; @@ -173,8 +183,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // implementation, which generates an invalid blockaddress when // cloning a function.) if (BB.hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), - const_cast<BasicBlock*>(&BB)); + Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc), + const_cast<BasicBlock *>(&BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); } @@ -183,54 +193,83 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } - for (DISubprogram *ISP : DIFinder.subprograms()) - if (ISP != SP) - VMap.MD()[ISP].reset(ISP); - - for (DICompileUnit *CU : DIFinder.compile_units()) - VMap.MD()[CU].reset(CU); - - for (DIType *Type : DIFinder.types()) - VMap.MD()[Type].reset(Type); + if (Changes < CloneFunctionChangeType::DifferentModule && + DIFinder->subprogram_count() > 0) { + // Turn on module-level changes, since we need to clone (some of) the + // debug info metadata. + // + // FIXME: Metadata effectively owned by a function should be made + // local, and only that local metadata should be cloned. + ModuleLevelChanges = true; + + auto mapToSelfIfNew = [&VMap](MDNode *N) { + // Avoid clobbering an existing mapping. + (void)VMap.MD().try_emplace(N, N); + }; + + // Avoid cloning types, compile units, and (other) subprograms. + for (DISubprogram *ISP : DIFinder->subprograms()) + if (ISP != SPClonedWithinModule) + mapToSelfIfNew(ISP); + + for (DICompileUnit *CU : DIFinder->compile_units()) + mapToSelfIfNew(CU); + + for (DIType *Type : DIFinder->types()) + mapToSelfIfNew(Type); + } else { + assert(!SPClonedWithinModule && + "Subprogram should be in DIFinder->subprogram_count()..."); + } + const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges; // Duplicate the metadata that is attached to the cloned function. // Subprograms/CUs/types that were already mapped to themselves won't be // duplicated. SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc->getAllMetadata(MDs); for (auto MD : MDs) { - NewFunc->addMetadata( - MD.first, - *MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); + NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag, + TypeMapper, Materializer)); } - // Loop over all of the instructions in the function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (Function::iterator BB = - cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), - BE = NewFunc->end(); + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (Function::iterator + BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), + BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (Instruction &II : *BB) - RemapInstruction(&II, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); - - // Register all DICompileUnits of the old parent module in the new parent module - auto* OldModule = OldFunc->getParent(); - auto* NewModule = NewFunc->getParent(); - if (OldModule && NewModule && OldModule != NewModule && DIFinder.compile_unit_count()) { - auto* NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); - // Avoid multiple insertions of the same DICompileUnit to NMD. - SmallPtrSet<const void*, 8> Visited; - for (auto* Operand : NMD->operands()) - Visited.insert(Operand); - for (auto* Unit : DIFinder.compile_units()) - // VMap.MD()[Unit] == Unit - if (Visited.insert(Unit).second) - NMD->addOperand(Unit); + RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer); + + // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the + // same module, the compile unit will already be listed (or not). When + // cloning a module, CloneModule() will handle creating the named metadata. + if (Changes != CloneFunctionChangeType::DifferentModule) + return; + + // Update !llvm.dbg.cu with compile units added to the new module if this + // function is being cloned in isolation. + // + // FIXME: This is making global / module-level changes, which doesn't seem + // like the right encapsulation Consider dropping the requirement to update + // !llvm.dbg.cu (either obsoleting the node, or restricting it to + // non-discardable compile units) instead of discovering compile units by + // visiting the metadata attached to global values, which would allow this + // code to be deleted. Alternatively, perhaps give responsibility for this + // update to CloneFunctionInto's callers. + auto *NewModule = NewFunc->getParent(); + auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + // Avoid multiple insertions of the same DICompileUnit to NMD. + SmallPtrSet<const void *, 8> Visited; + for (auto *Operand : NMD->operands()) + Visited.insert(Operand); + for (auto *Unit : DIFinder->compile_units()) { + MDNode *MappedUnit = + MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer); + if (Visited.insert(MappedUnit).second) + NMD->addOperand(MappedUnit); } } @@ -243,7 +282,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, /// Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo) { - std::vector<Type*> ArgTypes; + std::vector<Type *> ArgTypes; // The user might be deleting arguments to the function by specifying them in // the VMap. If so, we need to not add the arguments to the arg ty vector @@ -253,8 +292,9 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, ArgTypes.push_back(I.getType()); // Create a new function type... - FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), - ArgTypes, F->getFunctionType()->isVarArg()); + FunctionType *FTy = + FunctionType::get(F->getFunctionType()->getReturnType(), ArgTypes, + F->getFunctionType()->isVarArg()); // Create the new function... Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(), @@ -262,61 +302,60 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, // Loop over the arguments, copying the names of the mapped arguments over... Function::arg_iterator DestI = NewF->arg_begin(); - for (const Argument & I : F->args()) + for (const Argument &I : F->args()) if (VMap.count(&I) == 0) { // Is this argument preserved? DestI->setName(I.getName()); // Copy the name over... VMap[&I] = &*DestI++; // Add mapping to VMap } - SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", - CodeInfo); + SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly, + Returns, "", CodeInfo); return NewF; } - - namespace { - /// This is a private class used to implement CloneAndPruneFunctionInto. - struct PruningFunctionCloner { - Function *NewFunc; - const Function *OldFunc; - ValueToValueMapTy &VMap; - bool ModuleLevelChanges; - const char *NameSuffix; - ClonedCodeInfo *CodeInfo; - - public: - PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - ValueToValueMapTy &valueMap, bool moduleLevelChanges, - const char *nameSuffix, ClonedCodeInfo *codeInfo) - : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), - ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo) {} - - /// The specified block is found to be reachable, clone it and - /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, - BasicBlock::const_iterator StartingInst, - std::vector<const BasicBlock*> &ToClone); - }; -} +/// This is a private class used to implement CloneAndPruneFunctionInto. +struct PruningFunctionCloner { + Function *NewFunc; + const Function *OldFunc; + ValueToValueMapTy &VMap; + bool ModuleLevelChanges; + const char *NameSuffix; + ClonedCodeInfo *CodeInfo; + +public: + PruningFunctionCloner(Function *newFunc, const Function *oldFunc, + ValueToValueMapTy &valueMap, bool moduleLevelChanges, + const char *nameSuffix, ClonedCodeInfo *codeInfo) + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), + ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), + CodeInfo(codeInfo) {} + + /// The specified block is found to be reachable, clone it and + /// anything that it can reach. + void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, + std::vector<const BasicBlock *> &ToClone); +}; +} // namespace /// The specified block is found to be reachable, clone it and /// anything that it can reach. -void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, - BasicBlock::const_iterator StartingInst, - std::vector<const BasicBlock*> &ToClone){ +void PruningFunctionCloner::CloneBlock( + const BasicBlock *BB, BasicBlock::const_iterator StartingInst, + std::vector<const BasicBlock *> &ToClone) { WeakTrackingVH &BBEntry = VMap[BB]; // Have we already cloned this block? - if (BBEntry) return; + if (BBEntry) + return; // Nope, clone it now. BasicBlock *NewBB; BBEntry = NewBB = BasicBlock::Create(BB->getContext()); - if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + if (BB->hasName()) + NewBB->setName(BB->getName() + NameSuffix); // It is only legal to clone a function if a block address within that // function is never referenced outside of the function. Given that, we @@ -328,8 +367,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Note that we don't need to fix the mapping for unreachable blocks; // the default mapping there is safe. if (BB->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), - const_cast<BasicBlock*>(BB)); + Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc), + const_cast<BasicBlock *>(BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); } @@ -337,8 +376,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Loop over all instructions, and copy them over, DCE'ing as we go. This // loop doesn't include the terminator. - for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); - II != IE; ++II) { + for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; + ++II) { Instruction *NewInst = II->clone(); @@ -368,15 +407,17 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } if (II->hasName()) - NewInst->setName(II->getName()+NameSuffix); + NewInst->setName(II->getName() + NameSuffix); VMap[&*II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); - if (CodeInfo) + if (CodeInfo) { + CodeInfo->OrigVMap[&*II] = NewInst; if (auto *CB = dyn_cast<CallBase>(&*II)) if (CB->hasOperandBundles()) CodeInfo->OperandBundleCallSites.push_back(NewInst); + } if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (isa<ConstantInt>(AI->getArraySize())) @@ -414,9 +455,9 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, Value *V = VMap.lookup(SI->getCondition()); Cond = dyn_cast_or_null<ConstantInt>(V); } - if (Cond) { // Constant fold to uncond branch! + if (Cond) { // Constant fold to uncond branch! SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond); - BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); + BasicBlock *Dest = const_cast<BasicBlock *>(Case.getCaseSuccessor()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; @@ -426,24 +467,26 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (!TerminatorDone) { Instruction *NewInst = OldTI->clone(); if (OldTI->hasName()) - NewInst->setName(OldTI->getName()+NameSuffix); + NewInst->setName(OldTI->getName() + NameSuffix); NewBB->getInstList().push_back(NewInst); - VMap[OldTI] = NewInst; // Add instruction map to value. + VMap[OldTI] = NewInst; // Add instruction map to value. - if (CodeInfo) + if (CodeInfo) { + CodeInfo->OrigVMap[OldTI] = NewInst; if (auto *CB = dyn_cast<CallBase>(OldTI)) if (CB->hasOperandBundles()) CodeInfo->OperandBundleCallSites.push_back(NewInst); + } // Recursively clone any reachable successor blocks. append_range(ToClone, successors(BB->getTerminator())); } if (CodeInfo) { - CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; - CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && - BB != &BB->getParent()->front(); + CodeInfo->ContainsDynamicAllocas |= + hasStaticAllocas && BB != &BB->getParent()->front(); } } @@ -481,7 +524,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } // Clone the entry block, and anything recursively reachable from it. - std::vector<const BasicBlock*> CloneWorklist; + std::vector<const BasicBlock *> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); @@ -494,11 +537,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. - SmallVector<const PHINode*, 16> PHIToResolve; + SmallVector<const PHINode *, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap.lookup(&BI); BasicBlock *NewBB = cast_or_null<BasicBlock>(V); - if (!NewBB) continue; // Dead block. + if (!NewBB) + continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); @@ -523,7 +567,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. - for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { + for (unsigned phino = 0, e = PHIToResolve.size(); phino != e;) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); @@ -532,21 +576,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && - PHIToResolve[phino]->getParent() == OldBB; ++phino) { + PHIToResolve[phino]->getParent() == OldBB; + ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap.lookup(PN->getIncomingBlock(pred)); if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { - Value *InVal = MapValue(PN->getIncomingValue(pred), - VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + Value *InVal = + MapValue(PN->getIncomingValue(pred), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); - --pred; // Revisit the next entry. + --pred; // Revisit the next entry. --e; } } @@ -562,10 +607,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. - std::map<BasicBlock*, unsigned> PredCount; - for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); - PI != E; ++PI) - --PredCount[*PI]; + std::map<BasicBlock *, unsigned> PredCount; + for (BasicBlock *Pred : predecessors(NewBB)) + --PredCount[Pred]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) @@ -683,11 +727,15 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); - if (!BI || BI->isConditional()) { ++I; continue; } + if (!BI || BI->isConditional()) { + ++I; + continue; + } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { - ++I; continue; + ++I; + continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify @@ -720,7 +768,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } - /// This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for @@ -728,13 +775,10 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo, - Instruction *TheCall) { +void llvm::CloneAndPruneFunctionInto( + Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo); } @@ -885,10 +929,9 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween( return NewBB; } -void llvm::cloneNoAliasScopes( - ArrayRef<MDNode *> NoAliasDeclScopes, - DenseMap<MDNode *, MDNode *> &ClonedScopes, - StringRef Ext, LLVMContext &Context) { +void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + DenseMap<MDNode *, MDNode *> &ClonedScopes, + StringRef Ext, LLVMContext &Context) { MDBuilder MDB(Context); for (auto *ScopeList : NoAliasDeclScopes) { @@ -911,9 +954,9 @@ void llvm::cloneNoAliasScopes( } } -void llvm::adaptNoAliasScopes( - Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes, - LLVMContext &Context) { +void llvm::adaptNoAliasScopes(Instruction *I, + const DenseMap<MDNode *, MDNode *> &ClonedScopes, + LLVMContext &Context) { auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { bool NeedsReplacement = false; SmallVector<Metadata *, 8> NewScopeList; @@ -945,9 +988,9 @@ void llvm::adaptNoAliasScopes( replaceWhenNeeded(LLVMContext::MD_alias_scope); } -void llvm::cloneAndAdaptNoAliasScopes( - ArrayRef<MDNode *> NoAliasDeclScopes, - ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) { +void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + ArrayRef<BasicBlock *> NewBlocks, + LLVMContext &Context, StringRef Ext) { if (NoAliasDeclScopes.empty()) return; @@ -962,9 +1005,9 @@ void llvm::cloneAndAdaptNoAliasScopes( adaptNoAliasScopes(&I, ClonedScopes, Context); } -void llvm::cloneAndAdaptNoAliasScopes( - ArrayRef<MDNode *> NoAliasDeclScopes, Instruction *IStart, - Instruction *IEnd, LLVMContext &Context, StringRef Ext) { +void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + Instruction *IStart, Instruction *IEnd, + LLVMContext &Context, StringRef Ext) { if (NoAliasDeclScopes.empty()) return; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp index a6327bbf21bc..eb226b9b246d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -115,28 +115,26 @@ std::unique_ptr<Module> llvm::CloneModule( // have been created, loop through and copy the global variable referrers // over... We also set the attributes on the global now. // - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); + for (const GlobalVariable &G : M.globals()) { + GlobalVariable *GV = cast<GlobalVariable>(VMap[&G]); SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - I->getAllMetadata(MDs); + G.getAllMetadata(MDs); for (auto MD : MDs) - GV->addMetadata(MD.first, - *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); + GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); - if (I->isDeclaration()) + if (G.isDeclaration()) continue; - if (!ShouldCloneDefinition(&*I)) { + if (!ShouldCloneDefinition(&G)) { // Skip after setting the correct linkage for an external reference. GV->setLinkage(GlobalValue::ExternalLinkage); continue; } - if (I->hasInitializer()) - GV->setInitializer(MapValue(I->getInitializer(), VMap)); + if (G.hasInitializer()) + GV->setInitializer(MapValue(G.getInitializer(), VMap)); - copyComdat(GV, &*I); + copyComdat(GV, &G); } // Similarly, copy over function bodies now... @@ -162,7 +160,8 @@ std::unique_ptr<Module> llvm::CloneModule( } SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns); + CloneFunctionInto(F, &I, VMap, CloneFunctionChangeType::ClonedModule, + Returns); if (I.hasPersonalityFn()) F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap)); @@ -182,25 +181,13 @@ std::unique_ptr<Module> llvm::CloneModule( } // And named metadata.... - const auto* LLVM_DBG_CU = M.getNamedMetadata("llvm.dbg.cu"); for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end(); I != E; ++I) { const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); - if (&NMD == LLVM_DBG_CU) { - // Do not insert duplicate operands. - SmallPtrSet<const void*, 8> Visited; - for (const auto* Operand : NewNMD->operands()) - Visited.insert(Operand); - for (const auto* Operand : NMD.operands()) { - auto* MappedOperand = MapMetadata(Operand, VMap); - if (Visited.insert(MappedOperand).second) - NewNMD->addOperand(MappedOperand); - } - } else - for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); } return New; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 390925a03b73..9edc52b53550 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -333,7 +333,7 @@ void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) { MemAddr = LI->getPointerOperand(); } // Global variable can not be aliased with locals. - if (dyn_cast<Constant>(MemAddr)) + if (isa<Constant>(MemAddr)) break; Value *Base = MemAddr->stripInBoundsConstantOffsets(); if (!isa<AllocaInst>(Base)) { @@ -426,9 +426,8 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( CommonExitBlock->getFirstNonPHI()->getIterator()); - for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock); - PI != PE;) { - BasicBlock *Pred = *PI++; + for (BasicBlock *Pred : + llvm::make_early_inc_range(predecessors(CommonExitBlock))) { if (Blocks.count(Pred)) continue; Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); @@ -903,6 +902,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::Convergent: case Attribute::Dereferenceable: case Attribute::DereferenceableOrNull: + case Attribute::ElementType: case Attribute::InAlloca: case Attribute::InReg: case Attribute::InaccessibleMemOnly: @@ -930,6 +930,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StructRet: case Attribute::SwiftError: case Attribute::SwiftSelf: + case Attribute::SwiftAsync: case Attribute::WillReturn: case Attribute::WriteOnly: case Attribute::ZExt: @@ -954,6 +955,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NonLazyBind: case Attribute::NoRedZone: case Attribute::NoUnwind: + case Attribute::NoSanitizeCoverage: case Attribute::NullPointerIsValid: case Attribute::OptForFuzzing: case Attribute::OptimizeNone: @@ -971,6 +973,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StackProtectStrong: case Attribute::StrictFP: case Attribute::UWTable: + case Attribute::VScaleRange: case Attribute::NoCfCheck: case Attribute::MustProgress: case Attribute::NoProfile: @@ -1161,9 +1164,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector<Type *> ArgTypes; - for (ValueSet::iterator v = StructValues.begin(), - ve = StructValues.end(); v != ve; ++v) - ArgTypes.push_back((*v)->getType()); + for (Value *V : StructValues) + ArgTypes.push_back(V->getType()); // Allocate a struct at the beginning of this function StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); @@ -1513,20 +1515,19 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, continue; } - // If the location isn't a constant or an instruction, delete the - // intrinsic. - auto *DVI = cast<DbgVariableIntrinsic>(DII); - Value *Location = DVI->getVariableLocation(); - if (!Location || - (!isa<Constant>(Location) && !isa<Instruction>(Location))) { - DebugIntrinsicsToDelete.push_back(DVI); - continue; - } + auto IsInvalidLocation = [&NewFunc](Value *Location) { + // Location is invalid if it isn't a constant or an instruction, or is an + // instruction but isn't in the new function. + if (!Location || + (!isa<Constant>(Location) && !isa<Instruction>(Location))) + return true; + Instruction *LocationInst = dyn_cast<Instruction>(Location); + return LocationInst && LocationInst->getFunction() != &NewFunc; + }; - // If the variable location is an instruction but isn't in the new - // function, delete the intrinsic. - Instruction *LocationInst = dyn_cast<Instruction>(Location); - if (LocationInst && LocationInst->getFunction() != &NewFunc) { + auto *DVI = cast<DbgVariableIntrinsic>(DII); + // If any of the used locations are invalid, delete the intrinsic. + if (any_of(DVI->location_ops(), IsInvalidLocation)) { DebugIntrinsicsToDelete.push_back(DVI); continue; } @@ -1539,7 +1540,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, NewSP, OldVar->getName(), OldVar->getFile(), OldVar->getLine(), OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero, OldVar->getAlignInBits()); - DVI->setArgOperand(1, MetadataAsValue::get(Ctx, NewVar)); + DVI->setVariable(cast<DILocalVariable>(NewVar)); } for (auto *DII : DebugIntrinsicsToDelete) DII->eraseFromParent(); @@ -1552,10 +1553,11 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP)); // Loop info metadata may contain line locations. Fix them up. - auto updateLoopInfoLoc = [&Ctx, - NewSP](const DILocation &Loc) -> DILocation * { - return DILocation::get(Ctx, Loc.getLine(), Loc.getColumn(), NewSP, - nullptr); + auto updateLoopInfoLoc = [&Ctx, NewSP](Metadata *MD) -> Metadata * { + if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) + return DILocation::get(Ctx, Loc->getLine(), Loc->getColumn(), NewSP, + nullptr); + return MD; }; updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); } @@ -1595,10 +1597,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { Instruction *I = &*It; ++It; - if (match(I, m_Intrinsic<Intrinsic::assume>())) { + if (auto *AI = dyn_cast<AssumeInst>(I)) { if (AC) - AC->unregisterAssumption(cast<CallInst>(I)); - I->eraseFromParent(); + AC->unregisterAssumption(AI); + AI->eraseFromParent(); } } } @@ -1612,15 +1614,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { DenseMap<BasicBlock *, BlockFrequency> ExitWeights; SmallPtrSet<BasicBlock *, 1> ExitBlocks; for (BasicBlock *Block : Blocks) { - for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; - ++SI) { - if (!Blocks.count(*SI)) { + for (BasicBlock *Succ : successors(Block)) { + if (!Blocks.count(Succ)) { // Update the branch weight for this successor. if (BFI) { - BlockFrequency &BF = ExitWeights[*SI]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI); + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); } - ExitBlocks.insert(*SI); + ExitBlocks.insert(Succ); } } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp index 3e4d53c10dc9..30c3fa521d52 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp @@ -1,4 +1,4 @@ -//===- Debugify.cpp - Attach synthetic debug info to everything -----------===// +//===- Debugify.cpp - Check debug info preservation in optimizations ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,10 @@ // //===----------------------------------------------------------------------===// /// -/// \file This pass attaches synthetic debug info to everything. It can be used -/// to create targeted tests for debug info preservation. +/// \file In the `synthetic` mode, the `-debugify` attaches synthetic debug info +/// to everything. It can be used to create targeted tests for debug info +/// preservation. In addition, when using the `original` mode, it can check +/// original debug info preservation. The `synthetic` mode is default one. /// //===----------------------------------------------------------------------===// @@ -23,6 +25,10 @@ #include "llvm/IR/PassInstrumentation.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" + +#define DEBUG_TYPE "debugify" using namespace llvm; @@ -35,6 +41,8 @@ enum class Level { Locations, LocationsAndVariables }; + +// Used for the synthetic mode only. cl::opt<Level> DebugifyLevel( "debugify-level", cl::desc("Kind of debug info to add"), cl::values(clEnumValN(Level::Locations, "locations", "Locations only"), @@ -199,16 +207,33 @@ bool llvm::applyDebugifyMetadata( return true; } -static bool applyDebugify(Function &F) { +static bool +applyDebugify(Function &F, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPassMap *DIPreservationMap = nullptr, + StringRef NameOfWrappedPass = "") { Module &M = *F.getParent(); auto FuncIt = F.getIterator(); - return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), - "FunctionDebugify: ", /*ApplyToMF=*/nullptr); + if (Mode == DebugifyMode::SyntheticDebugInfo) + return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), + "FunctionDebugify: ", /*ApplyToMF*/ nullptr); + assert(DIPreservationMap); + return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap, + "FunctionDebugify (original debuginfo)", + NameOfWrappedPass); } -static bool applyDebugify(Module &M) { - return applyDebugifyMetadata(M, M.functions(), - "ModuleDebugify: ", /*ApplyToMF=*/nullptr); +static bool +applyDebugify(Module &M, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPassMap *DIPreservationMap = nullptr, + StringRef NameOfWrappedPass = "") { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return applyDebugifyMetadata(M, M.functions(), + "ModuleDebugify: ", /*ApplyToMF*/ nullptr); + return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap, + "ModuleDebugify (original debuginfo)", + NameOfWrappedPass); } bool llvm::stripDebugifyMetadata(Module &M) { @@ -256,6 +281,355 @@ bool llvm::stripDebugifyMetadata(Module &M) { return Changed; } +bool llvm::collectDebugInfoMetadata(Module &M, + iterator_range<Module::iterator> Functions, + DebugInfoPerPassMap &DIPreservationMap, + StringRef Banner, + StringRef NameOfWrappedPass) { + LLVM_DEBUG(dbgs() << Banner << ": (before) " << NameOfWrappedPass << '\n'); + + // Clear the map with the debug info before every single pass. + DIPreservationMap.clear(); + + if (!M.getNamedMetadata("llvm.dbg.cu")) { + dbg() << Banner << ": Skipping module without debug info\n"; + return false; + } + + // Visit each instruction. + for (Function &F : Functions) { + if (isFunctionSkipped(F)) + continue; + + // Collect the DISubprogram. + auto *SP = F.getSubprogram(); + DIPreservationMap[NameOfWrappedPass].DIFunctions.insert({F.getName(), SP}); + if (SP) { + LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n'); + for (const DINode *DN : SP->getRetainedNodes()) { + if (const auto *DV = dyn_cast<DILocalVariable>(DN)) { + DIPreservationMap[NameOfWrappedPass].DIVariables[DV] = 0; + } + } + } + + for (BasicBlock &BB : F) { + // Collect debug locations (!dbg) and debug variable intrinsics. + for (Instruction &I : BB) { + // Skip PHIs. + if (isa<PHINode>(I)) + continue; + + // Collect dbg.values and dbg.declares. + if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) { + if (!SP) + continue; + // Skip inlined variables. + if (I.getDebugLoc().getInlinedAt()) + continue; + // Skip undef values. + if (DVI->isUndef()) + continue; + + auto *Var = DVI->getVariable(); + DIPreservationMap[NameOfWrappedPass].DIVariables[Var]++; + continue; + } + + // Skip debug instructions other than dbg.value and dbg.declare. + if (isa<DbgInfoIntrinsic>(&I)) + continue; + + LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); + DIPreservationMap[NameOfWrappedPass].InstToDelete.insert({&I, &I}); + + const DILocation *Loc = I.getDebugLoc().get(); + bool HasLoc = Loc != nullptr; + DIPreservationMap[NameOfWrappedPass].DILocations.insert({&I, HasLoc}); + } + } + } + + return true; +} + +// This checks the preservation of original debug info attached to functions. +static bool checkFunctions(const DebugFnMap &DIFunctionsBefore, + const DebugFnMap &DIFunctionsAfter, + StringRef NameOfWrappedPass, + StringRef FileNameFromCU, bool ShouldWriteIntoJSON, + llvm::json::Array &Bugs) { + bool Preserved = true; + for (const auto &F : DIFunctionsAfter) { + if (F.second) + continue; + auto SPIt = DIFunctionsBefore.find(F.first); + if (SPIt == DIFunctionsBefore.end()) { + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"}, + {"name", F.first}, + {"action", "not-generate"}})); + else + dbg() << "ERROR: " << NameOfWrappedPass + << " did not generate DISubprogram for " << F.first << " from " + << FileNameFromCU << '\n'; + Preserved = false; + } else { + auto SP = SPIt->second; + if (!SP) + continue; + // If the function had the SP attached before the pass, consider it as + // a debug info bug. + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"}, + {"name", F.first}, + {"action", "drop"}})); + else + dbg() << "ERROR: " << NameOfWrappedPass << " dropped DISubprogram of " + << F.first << " from " << FileNameFromCU << '\n'; + Preserved = false; + } + } + + return Preserved; +} + +// This checks the preservation of the original debug info attached to +// instructions. +static bool checkInstructions(const DebugInstMap &DILocsBefore, + const DebugInstMap &DILocsAfter, + const WeakInstValueMap &InstToDelete, + StringRef NameOfWrappedPass, + StringRef FileNameFromCU, + bool ShouldWriteIntoJSON, + llvm::json::Array &Bugs) { + bool Preserved = true; + for (const auto &L : DILocsAfter) { + if (L.second) + continue; + auto Instr = L.first; + + // In order to avoid pointer reuse/recycling, skip the values that might + // have been deleted during a pass. + auto WeakInstrPtr = InstToDelete.find(Instr); + if (WeakInstrPtr != InstToDelete.end() && !WeakInstrPtr->second) + continue; + + auto FnName = Instr->getFunction()->getName(); + auto BB = Instr->getParent(); + auto BBName = BB->hasName() ? BB->getName() : "no-name"; + auto InstName = Instruction::getOpcodeName(Instr->getOpcode()); + + auto InstrIt = DILocsBefore.find(Instr); + if (InstrIt == DILocsBefore.end()) { + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr", InstName}, + {"action", "not-generate"}})); + else + dbg() << "WARNING: " << NameOfWrappedPass + << " did not generate DILocation for " << *Instr + << " (BB: " << BBName << ", Fn: " << FnName + << ", File: " << FileNameFromCU << ")\n"; + Preserved = false; + } else { + if (!InstrIt->second) + continue; + // If the instr had the !dbg attached before the pass, consider it as + // a debug info issue. + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr", InstName}, + {"action", "drop"}})); + else + dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of " + << *Instr << " (BB: " << BBName << ", Fn: " << FnName + << ", File: " << FileNameFromCU << ")\n"; + Preserved = false; + } + } + + return Preserved; +} + +// This checks the preservation of original debug variable intrinsics. +static bool checkVars(const DebugVarMap &DIFunctionsBefore, + const DebugVarMap &DIFunctionsAfter, + StringRef NameOfWrappedPass, StringRef FileNameFromCU, + bool ShouldWriteIntoJSON, llvm::json::Array &Bugs) { + bool Preserved = true; + for (const auto &V : DIFunctionsBefore) { + auto VarIt = DIFunctionsAfter.find(V.first); + if (VarIt == DIFunctionsAfter.end()) + continue; + + unsigned NumOfDbgValsAfter = VarIt->second; + + if (V.second > NumOfDbgValsAfter) { + if (ShouldWriteIntoJSON) + Bugs.push_back(llvm::json::Object( + {{"metadata", "dbg-var-intrinsic"}, + {"name", V.first->getName()}, + {"fn-name", V.first->getScope()->getSubprogram()->getName()}, + {"action", "drop"}})); + else + dbg() << "WARNING: " << NameOfWrappedPass + << " drops dbg.value()/dbg.declare() for " << V.first->getName() + << " from " + << "function " << V.first->getScope()->getSubprogram()->getName() + << " (file " << FileNameFromCU << ")\n"; + Preserved = false; + } + } + + return Preserved; +} + +// Write the json data into the specifed file. +static void writeJSON(StringRef OrigDIVerifyBugsReportFilePath, + StringRef FileNameFromCU, StringRef NameOfWrappedPass, + llvm::json::Array &Bugs) { + std::error_code EC; + raw_fd_ostream OS_FILE{OrigDIVerifyBugsReportFilePath, EC, + sys::fs::OF_Append | sys::fs::OF_TextWithCRLF}; + if (EC) { + errs() << "Could not open file: " << EC.message() << ", " + << OrigDIVerifyBugsReportFilePath << '\n'; + return; + } + + OS_FILE << "{\"file\":\"" << FileNameFromCU << "\", "; + + StringRef PassName = NameOfWrappedPass != "" ? NameOfWrappedPass : "no-name"; + OS_FILE << "\"pass\":\"" << PassName << "\", "; + + llvm::json::Value BugsToPrint{std::move(Bugs)}; + OS_FILE << "\"bugs\": " << BugsToPrint; + + OS_FILE << "}\n"; +} + +bool llvm::checkDebugInfoMetadata(Module &M, + iterator_range<Module::iterator> Functions, + DebugInfoPerPassMap &DIPreservationMap, + StringRef Banner, StringRef NameOfWrappedPass, + StringRef OrigDIVerifyBugsReportFilePath) { + LLVM_DEBUG(dbgs() << Banner << ": (after) " << NameOfWrappedPass << '\n'); + + if (!M.getNamedMetadata("llvm.dbg.cu")) { + dbg() << Banner << ": Skipping module without debug info\n"; + return false; + } + + // Map the debug info holding DIs after a pass. + DebugInfoPerPassMap DIPreservationAfter; + + // Visit each instruction. + for (Function &F : Functions) { + if (isFunctionSkipped(F)) + continue; + + // TODO: Collect metadata other than DISubprograms. + // Collect the DISubprogram. + auto *SP = F.getSubprogram(); + DIPreservationAfter[NameOfWrappedPass].DIFunctions.insert( + {F.getName(), SP}); + + if (SP) { + LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n'); + for (const DINode *DN : SP->getRetainedNodes()) { + if (const auto *DV = dyn_cast<DILocalVariable>(DN)) { + DIPreservationAfter[NameOfWrappedPass].DIVariables[DV] = 0; + } + } + } + + for (BasicBlock &BB : F) { + // Collect debug locations (!dbg) and debug variable intrinsics. + for (Instruction &I : BB) { + // Skip PHIs. + if (isa<PHINode>(I)) + continue; + + // Collect dbg.values and dbg.declares. + if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) { + if (!SP) + continue; + // Skip inlined variables. + if (I.getDebugLoc().getInlinedAt()) + continue; + // Skip undef values. + if (DVI->isUndef()) + continue; + + auto *Var = DVI->getVariable(); + DIPreservationAfter[NameOfWrappedPass].DIVariables[Var]++; + continue; + } + + // Skip debug instructions other than dbg.value and dbg.declare. + if (isa<DbgInfoIntrinsic>(&I)) + continue; + + LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); + + const DILocation *Loc = I.getDebugLoc().get(); + bool HasLoc = Loc != nullptr; + + DIPreservationAfter[NameOfWrappedPass].DILocations.insert({&I, HasLoc}); + } + } + } + + // TODO: The name of the module could be read better? + StringRef FileNameFromCU = + (cast<DICompileUnit>(M.getNamedMetadata("llvm.dbg.cu")->getOperand(0))) + ->getFilename(); + + auto DIFunctionsBefore = DIPreservationMap[NameOfWrappedPass].DIFunctions; + auto DIFunctionsAfter = DIPreservationAfter[NameOfWrappedPass].DIFunctions; + + auto DILocsBefore = DIPreservationMap[NameOfWrappedPass].DILocations; + auto DILocsAfter = DIPreservationAfter[NameOfWrappedPass].DILocations; + + auto InstToDelete = DIPreservationAfter[NameOfWrappedPass].InstToDelete; + + auto DIVarsBefore = DIPreservationMap[NameOfWrappedPass].DIVariables; + auto DIVarsAfter = DIPreservationAfter[NameOfWrappedPass].DIVariables; + + bool ShouldWriteIntoJSON = !OrigDIVerifyBugsReportFilePath.empty(); + llvm::json::Array Bugs; + + bool ResultForFunc = + checkFunctions(DIFunctionsBefore, DIFunctionsAfter, NameOfWrappedPass, + FileNameFromCU, ShouldWriteIntoJSON, Bugs); + bool ResultForInsts = checkInstructions( + DILocsBefore, DILocsAfter, InstToDelete, NameOfWrappedPass, + FileNameFromCU, ShouldWriteIntoJSON, Bugs); + + bool ResultForVars = checkVars(DIVarsBefore, DIVarsAfter, NameOfWrappedPass, + FileNameFromCU, ShouldWriteIntoJSON, Bugs); + + bool Result = ResultForFunc && ResultForInsts && ResultForVars; + + StringRef ResultBanner = NameOfWrappedPass != "" ? NameOfWrappedPass : Banner; + if (ShouldWriteIntoJSON && !Bugs.empty()) + writeJSON(OrigDIVerifyBugsReportFilePath, FileNameFromCU, NameOfWrappedPass, + Bugs); + + if (Result) + dbg() << ResultBanner << ": PASS\n"; + else + dbg() << ResultBanner << ": FAIL\n"; + + LLVM_DEBUG(dbgs() << "\n\n"); + return Result; +} + namespace { /// Return true if a mis-sized diagnostic is issued for \p DVI. bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) { @@ -264,15 +638,16 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) { // // TODO: This, along with a check for non-null value operands, should be // promoted to verifier failures. - Value *V = DVI->getValue(); - if (!V) - return false; // For now, don't try to interpret anything more complicated than an empty // DIExpression. Eventually we should try to handle OP_deref and fragments. if (DVI->getExpression()->getNumElements()) return false; + Value *V = DVI->getVariableLocationOp(0); + if (!V) + return false; + Type *Ty = V->getType(); uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty); Optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits(); @@ -331,7 +706,7 @@ bool checkDebugifyMetadata(Module &M, // Find missing lines. for (Instruction &I : instructions(F)) { - if (isa<DbgValueInst>(&I) || isa<PHINode>(&I)) + if (isa<DbgValueInst>(&I)) continue; auto DL = I.getDebugLoc(); @@ -340,7 +715,7 @@ bool checkDebugifyMetadata(Module &M, continue; } - if (!DL) { + if (!isa<PHINode>(&I) && !DL) { dbg() << "WARNING: Instruction with empty DebugLoc in function "; dbg() << F.getName() << " --"; I.print(dbg()); @@ -394,43 +769,77 @@ bool checkDebugifyMetadata(Module &M, /// ModulePass for attaching synthetic debug info to everything, used with the /// legacy module pass manager. struct DebugifyModulePass : public ModulePass { - bool runOnModule(Module &M) override { return applyDebugify(M); } + bool runOnModule(Module &M) override { + return applyDebugify(M, Mode, DIPreservationMap, NameOfWrappedPass); + } - DebugifyModulePass() : ModulePass(ID) {} + DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + StringRef NameOfWrappedPass = "", + DebugInfoPerPassMap *DIPreservationMap = nullptr) + : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass), + DIPreservationMap(DIPreservationMap), Mode(Mode) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } static char ID; // Pass identification. + +private: + StringRef NameOfWrappedPass; + DebugInfoPerPassMap *DIPreservationMap; + enum DebugifyMode Mode; }; /// FunctionPass for attaching synthetic debug info to instructions within a /// single function, used with the legacy module pass manager. struct DebugifyFunctionPass : public FunctionPass { - bool runOnFunction(Function &F) override { return applyDebugify(F); } + bool runOnFunction(Function &F) override { + return applyDebugify(F, Mode, DIPreservationMap, NameOfWrappedPass); + } - DebugifyFunctionPass() : FunctionPass(ID) {} + DebugifyFunctionPass( + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + StringRef NameOfWrappedPass = "", + DebugInfoPerPassMap *DIPreservationMap = nullptr) + : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass), + DIPreservationMap(DIPreservationMap), Mode(Mode) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } static char ID; // Pass identification. + +private: + StringRef NameOfWrappedPass; + DebugInfoPerPassMap *DIPreservationMap; + enum DebugifyMode Mode; }; /// ModulePass for checking debug info inserted by -debugify, used with the /// legacy module pass manager. struct CheckDebugifyModulePass : public ModulePass { bool runOnModule(Module &M) override { - return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, - "CheckModuleDebugify", Strip, StatsMap); + if (Mode == DebugifyMode::SyntheticDebugInfo) + return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, + "CheckModuleDebugify", Strip, StatsMap); + return checkDebugInfoMetadata( + M, M.functions(), *DIPreservationMap, + "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, + OrigDIVerifyBugsReportFilePath); } - CheckDebugifyModulePass(bool Strip = false, StringRef NameOfWrappedPass = "", - DebugifyStatsMap *StatsMap = nullptr) - : ModulePass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass), - StatsMap(StatsMap) {} + CheckDebugifyModulePass( + bool Strip = false, StringRef NameOfWrappedPass = "", + DebugifyStatsMap *StatsMap = nullptr, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPassMap *DIPreservationMap = nullptr, + StringRef OrigDIVerifyBugsReportFilePath = "") + : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass), + OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath), + StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode), + Strip(Strip) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -439,9 +848,12 @@ struct CheckDebugifyModulePass : public ModulePass { static char ID; // Pass identification. private: - bool Strip; StringRef NameOfWrappedPass; + StringRef OrigDIVerifyBugsReportFilePath; DebugifyStatsMap *StatsMap; + DebugInfoPerPassMap *DIPreservationMap; + enum DebugifyMode Mode; + bool Strip; }; /// FunctionPass for checking debug info inserted by -debugify-function, used @@ -450,16 +862,26 @@ struct CheckDebugifyFunctionPass : public FunctionPass { bool runOnFunction(Function &F) override { Module &M = *F.getParent(); auto FuncIt = F.getIterator(); - return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), - NameOfWrappedPass, "CheckFunctionDebugify", - Strip, StatsMap); + if (Mode == DebugifyMode::SyntheticDebugInfo) + return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), + NameOfWrappedPass, "CheckFunctionDebugify", + Strip, StatsMap); + return checkDebugInfoMetadata( + M, make_range(FuncIt, std::next(FuncIt)), *DIPreservationMap, + "CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass, + OrigDIVerifyBugsReportFilePath); } - CheckDebugifyFunctionPass(bool Strip = false, - StringRef NameOfWrappedPass = "", - DebugifyStatsMap *StatsMap = nullptr) - : FunctionPass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass), - StatsMap(StatsMap) {} + CheckDebugifyFunctionPass( + bool Strip = false, StringRef NameOfWrappedPass = "", + DebugifyStatsMap *StatsMap = nullptr, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPassMap *DIPreservationMap = nullptr, + StringRef OrigDIVerifyBugsReportFilePath = "") + : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass), + OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath), + StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode), + Strip(Strip) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -468,9 +890,12 @@ struct CheckDebugifyFunctionPass : public FunctionPass { static char ID; // Pass identification. private: - bool Strip; StringRef NameOfWrappedPass; + StringRef OrigDIVerifyBugsReportFilePath; DebugifyStatsMap *StatsMap; + DebugInfoPerPassMap *DIPreservationMap; + enum DebugifyMode Mode; + bool Strip; }; } // end anonymous namespace @@ -496,12 +921,23 @@ void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) { } } -ModulePass *llvm::createDebugifyModulePass() { - return new DebugifyModulePass(); +ModulePass *createDebugifyModulePass(enum DebugifyMode Mode, + llvm::StringRef NameOfWrappedPass, + DebugInfoPerPassMap *DIPreservationMap) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new DebugifyModulePass(); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new DebugifyModulePass(Mode, NameOfWrappedPass, DIPreservationMap); } -FunctionPass *llvm::createDebugifyFunctionPass() { - return new DebugifyFunctionPass(); +FunctionPass * +createDebugifyFunctionPass(enum DebugifyMode Mode, + llvm::StringRef NameOfWrappedPass, + DebugInfoPerPassMap *DIPreservationMap) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new DebugifyFunctionPass(); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DIPreservationMap); } PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { @@ -510,16 +946,28 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { return PreservedAnalyses::all(); } -ModulePass *llvm::createCheckDebugifyModulePass(bool Strip, - StringRef NameOfWrappedPass, - DebugifyStatsMap *StatsMap) { - return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap); +ModulePass *createCheckDebugifyModulePass( + bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap, + enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap, + StringRef OrigDIVerifyBugsReportFilePath) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new CheckDebugifyModulePass(false, NameOfWrappedPass, nullptr, Mode, + DIPreservationMap, + OrigDIVerifyBugsReportFilePath); } -FunctionPass * -llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass, - DebugifyStatsMap *StatsMap) { - return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap); +FunctionPass *createCheckDebugifyFunctionPass( + bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap, + enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap, + StringRef OrigDIVerifyBugsReportFilePath) { + if (Mode == DebugifyMode::SyntheticDebugInfo) + return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap); + assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode"); + return new CheckDebugifyFunctionPass(false, NameOfWrappedPass, nullptr, Mode, + DIPreservationMap, + OrigDIVerifyBugsReportFilePath); } PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 26f8e21952cc..31d03e1e86af 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -9,6 +9,7 @@ #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" @@ -17,6 +18,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils.h" + using namespace llvm; static void insertCall(Function &CurFn, StringRef Func, @@ -123,6 +125,7 @@ struct EntryExitInstrumenter : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); } }; @@ -136,20 +139,34 @@ struct PostInlineEntryExitInstrumenter : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); } }; char PostInlineEntryExitInstrumenter::ID = 0; } -INITIALIZE_PASS( +INITIALIZE_PASS_BEGIN( + EntryExitInstrumenter, "ee-instrument", + "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END( EntryExitInstrumenter, "ee-instrument", "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", false, false) -INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) + +INITIALIZE_PASS_BEGIN( + PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", + "Instrument function entry/exit with calls to e.g. mcount() " + "(post inlining)", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END( + PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", + "Instrument function entry/exit with calls to e.g. mcount() " + "(post inlining)", + false, false) FunctionPass *llvm::createEntryExitInstrumenterPass() { return new EntryExitInstrumenter(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp index accedd5b4ee0..91053338df5f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -90,7 +90,7 @@ IRBuilder<> *EscapeEnumerator::Next() { SmallVector<Value *, 16> Args; for (unsigned I = Calls.size(); I != 0;) { CallInst *CI = cast<CallInst>(Calls[--I]); - changeToInvokeAndSplitBasicBlock(CI, CleanupBB); + changeToInvokeAndSplitBasicBlock(CI, CleanupBB, DTU); } Builder.SetInsertPoint(RI); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp index 732b00635e29..463c223d9e8f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -127,7 +127,7 @@ isSimpleEnoughValueToCommit(Constant *C, /// another pointer type, we punt. We basically just support direct accesses to /// globals and GEP's of globals. This should be kept up to date with /// CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C) { +static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) { // Conservatively, avoid aggregate types. This is because we don't // want to worry about them partially overlapping other stores. if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) @@ -157,13 +157,14 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { if (!CE->isGEPWithNoNotionalOverIndexing()) return false; - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); - - // A constantexpr bitcast from a pointer to another pointer is a no-op, - // and we know how to evaluate it by moving the bitcast from the pointer - // operand to the value operand. + return ConstantFoldLoadThroughGEPConstantExpr( + GV->getInitializer(), CE, + cast<GEPOperator>(CE)->getResultElementType(), DL); } else if (CE->getOpcode() == Instruction::BitCast && isa<GlobalVariable>(CE->getOperand(0))) { + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); @@ -173,16 +174,16 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; } -/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's -/// type and walk down through the initial elements to obtain additional -/// pointers to try. Returns the first non-null return value from Func, or -/// nullptr if the type can't be introspected further. +/// Apply \p TryLoad to Ptr. If this returns \p nullptr, introspect the +/// pointer's type and walk down through the initial elements to obtain +/// additional pointers to try. Returns the first non-null return value from +/// \p TryLoad, or \p nullptr if the type can't be introspected further. static Constant * evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, const TargetLibraryInfo *TLI, - std::function<Constant *(Constant *)> Func) { + std::function<Constant *(Constant *)> TryLoad) { Constant *Val; - while (!(Val = Func(Ptr))) { + while (!(Val = TryLoad(Ptr))) { // If Ty is a non-opaque struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. @@ -207,12 +208,14 @@ static Constant *getInitializer(Constant *C) { /// Return the value that would be computed by a load from P after the stores /// reflected by 'memory' have been performed. If we can't decide, return null. -Constant *Evaluator::ComputeLoadResult(Constant *P) { +Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. - auto findMemLoc = [this](Constant *Ptr) { return MutatedMemory.lookup(Ptr); }; + auto TryFindMemLoc = [this](Constant *Ptr) { + return MutatedMemory.lookup(Ptr); + }; - if (Constant *Val = findMemLoc(P)) + if (Constant *Val = TryFindMemLoc(P)) return Val; // Access it. @@ -227,7 +230,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { // Handle a constantexpr getelementptr. case Instruction::GetElementPtr: if (auto *I = getInitializer(CE->getOperand(0))) - return ConstantFoldLoadThroughGEPConstantExpr(I, CE); + return ConstantFoldLoadThroughGEPConstantExpr(I, CE, Ty, DL); break; // Handle a constantexpr bitcast. case Instruction::BitCast: @@ -236,7 +239,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { // If it hasn't, we may still be able to find a stored pointer by // introspecting the type. Constant *Val = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc); + evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryFindMemLoc); if (!Val) Val = getInitializer(CE->getOperand(0)); if (Val) @@ -318,9 +321,10 @@ Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) { /// Evaluate all instructions in block BB, returning true if successful, false /// if we can't evaluate it. NewBB returns the next BB that control flows into, -/// or null upon return. -bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, - BasicBlock *&NextBB) { +/// or null upon return. StrippedPointerCastsForAliasAnalysis is set to true if +/// we looked through pointer casts to evaluate something. +bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, + bool &StrippedPointerCastsForAliasAnalysis) { // This is the main evaluation loop. while (true) { Constant *InstResult = nullptr; @@ -339,7 +343,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Ptr = FoldedPtr; LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } - if (!isSimpleEnoughPointerToCommit(Ptr)) { + if (!isSimpleEnoughPointerToCommit(Ptr, DL)) { // If this is too complex for us to commit, reject it. LLVM_DEBUG( dbgs() << "Pointer is too complex for us to evaluate store."); @@ -367,9 +371,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // legal. If it's not, we can try introspecting the type to find a // legal conversion. - auto castValTy = [&](Constant *P) -> Constant * { - Type *Ty = cast<PointerType>(P->getType())->getElementType(); - if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) { + auto TryCastValTy = [&](Constant *P) -> Constant * { + // The conversion is illegal if the store is wider than the + // pointee proposed by `evaluateBitcastFromPtr`, since that would + // drop stores to other struct elements when the caller attempts to + // look through a struct's 0th element. + Type *NewTy = cast<PointerType>(P->getType())->getElementType(); + Type *STy = Val->getType(); + if (DL.getTypeSizeInBits(NewTy) < DL.getTypeSizeInBits(STy)) + return nullptr; + + if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, NewTy, DL)) { Ptr = P; return FV; } @@ -377,7 +389,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, }; Constant *NewVal = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy); + evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryCastValTy); if (!NewVal) { LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " "evaluate.\n"); @@ -428,9 +440,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector<Constant*, 8> GEPOps; - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); - i != e; ++i) - GEPOps.push_back(getVal(*i)); + for (Use &Op : llvm::drop_begin(GEP->operands())) + GEPOps.push_back(getVal(Op)); InstResult = ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, cast<GEPOperator>(GEP)->isInBounds()); @@ -450,7 +461,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, "folding: " << *Ptr << "\n"); } - InstResult = ComputeLoadResult(Ptr); + InstResult = ComputeLoadResult(Ptr, LI->getType()); if (!InstResult) { LLVM_DEBUG( dbgs() << "Failed to compute load result. Can not evaluate load." @@ -496,7 +507,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, } Constant *Ptr = getVal(MSI->getDest()); Constant *Val = getVal(MSI->getValue()); - Constant *DestVal = ComputeLoadResult(getVal(Ptr)); + Constant *DestVal = + ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType()); if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { // This memset is a no-op. LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); @@ -551,56 +563,74 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); ++CurInst; continue; + } else { + Value *Stripped = CurInst->stripPointerCastsForAliasAnalysis(); + // Only attempt to getVal() if we've actually managed to strip + // anything away, or else we'll call getVal() on the current + // instruction. + if (Stripped != &*CurInst) { + InstResult = getVal(Stripped); + } + if (InstResult) { + LLVM_DEBUG(dbgs() + << "Stripped pointer casts for alias analysis for " + "intrinsic call.\n"); + StrippedPointerCastsForAliasAnalysis = true; + InstResult = ConstantExpr::getBitCast(InstResult, II->getType()); + } else { + LLVM_DEBUG(dbgs() << "Unknown intrinsic. Cannot evaluate.\n"); + return false; + } } - - LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); - return false; } - // Resolve function pointers. - SmallVector<Constant *, 8> Formals; - Function *Callee = getCalleeWithFormalArgs(CB, Formals); - if (!Callee || Callee->isInterposable()) { - LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); - return false; // Cannot resolve. - } + if (!InstResult) { + // Resolve function pointers. + SmallVector<Constant *, 8> Formals; + Function *Callee = getCalleeWithFormalArgs(CB, Formals); + if (!Callee || Callee->isInterposable()) { + LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); + return false; // Cannot resolve. + } - if (Callee->isDeclaration()) { - // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) { - InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C); - if (!InstResult) + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) { + InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C); + if (!InstResult) + return false; + LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " + << *InstResult << "\n"); + } else { + LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; - LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " - << *InstResult << "\n"); + } } else { - LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n"); - return false; - } - } else { - if (Callee->getFunctionType()->isVarArg()) { - LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); - return false; - } + if (Callee->getFunctionType()->isVarArg()) { + LLVM_DEBUG(dbgs() + << "Can not constant fold vararg function call.\n"); + return false; + } - Constant *RetVal = nullptr; - // Execute the call, if successful, use the return value. - ValueStack.emplace_back(); - if (!EvaluateFunction(Callee, RetVal, Formals)) { - LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n"); - return false; - } - ValueStack.pop_back(); - InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal); - if (RetVal && !InstResult) - return false; + Constant *RetVal = nullptr; + // Execute the call, if successful, use the return value. + ValueStack.emplace_back(); + if (!EvaluateFunction(Callee, RetVal, Formals)) { + LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n"); + return false; + } + ValueStack.pop_back(); + InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal); + if (RetVal && !InstResult) + return false; - if (InstResult) { - LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " - << *InstResult << "\n\n"); - } else { - LLVM_DEBUG(dbgs() - << "Successfully evaluated function. Result: 0\n\n"); + if (InstResult) { + LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " + << *InstResult << "\n\n"); + } else { + LLVM_DEBUG(dbgs() + << "Successfully evaluated function. Result: 0\n\n"); + } } } } else if (CurInst->isTerminator()) { @@ -695,15 +725,27 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); - if (!EvaluateBlock(CurInst, NextBB)) + bool StrippedPointerCastsForAliasAnalysis = false; + + if (!EvaluateBlock(CurInst, NextBB, StrippedPointerCastsForAliasAnalysis)) return false; if (!NextBB) { // Successfully running until there's no next block means that we found // the return. Fill it the return value and pop the call stack. ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); - if (RI->getNumOperands()) + if (RI->getNumOperands()) { + // The Evaluator can look through pointer casts as long as alias + // analysis holds because it's just a simple interpreter and doesn't + // skip memory accesses due to invariant group metadata, but we can't + // let users of Evaluator use a value that's been gleaned looking + // through stripping pointer casts. + if (StrippedPointerCastsForAliasAnalysis && + !RI->getReturnValue()->getType()->isVoidTy()) { + return false; + } RetVal = getVal(RI->getOperand(0)); + } CallStack.pop_back(); return true; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp index 44af95eef67d..10f48fe827f4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -129,8 +129,7 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop, SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end()); CandidateLoops.erase(FirstChild, CandidateLoops.end()); - for (auto II = ChildLoops.begin(), IE = ChildLoops.end(); II != IE; ++II) { - auto Child = *II; + for (Loop *Child : ChildLoops) { LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName() << "\n"); // TODO: A child loop whose header is also a header in the current diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp index 0098dcaeb07a..dbcacc20b589 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -360,7 +360,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2, for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { // Check alias with Head2. - if (!AA || AA->alias(&*iter1, &*BI)) + if (!AA || !AA->isNoAlias(&*iter1, &*BI)) return false; } } @@ -411,8 +411,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2, /// approach goes for the opposite case. bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { BasicBlock *IfTrue2, *IfFalse2; - Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); - Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); + BranchInst *DomBI2 = GetIfCondition(BB, IfTrue2, IfFalse2); + if (!DomBI2) + return false; + Instruction *CInst2 = dyn_cast<Instruction>(DomBI2->getCondition()); if (!CInst2) return false; @@ -421,8 +423,10 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { return false; BasicBlock *IfTrue1, *IfFalse1; - Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); - Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); + BranchInst *DomBI1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); + if (!DomBI1) + return false; + Instruction *CInst1 = dyn_cast<Instruction>(DomBI1->getCondition()); if (!CInst1) return false; @@ -479,7 +483,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { FirstEntryBlock->getInstList() .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator()); - assert(PBI->getCondition() == IfCond2); + assert(PBI->getCondition() == CInst2); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Builder.SetInsertPoint(PBI); @@ -494,7 +498,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { PBI->swapSuccessors(); } Value *NC = Builder.CreateBinOp(CombineOp, CInst1, CInst2); - PBI->replaceUsesOfWith(IfCond2, NC); + PBI->replaceUsesOfWith(CInst2, NC); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); // Handle PHI node to replace its predecessors to FirstEntryBlock. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 8df7ae9563d8..2946c0018c31 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -276,10 +276,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { // When ClearDSOLocalOnDeclarations is true, clear dso_local if GV is // converted to a declaration, to disable direct access. Don't do this if GV // is implicitly dso_local due to a non-default visibility. - if (ClearDSOLocalOnDeclarations && GV.isDeclarationForLinker() && + if (ClearDSOLocalOnDeclarations && + (GV.isDeclarationForLinker() || + (isPerformingImport() && !doImportAsDefinition(&GV))) && !GV.isImplicitDSOLocal()) { GV.setDSOLocal(false); - } else if (VI && VI.isDSOLocal()) { + } else if (VI && VI.isDSOLocal(ImportIndex.withDSOLocalPropagation())) { // If all summaries are dso_local, symbol gets resolved to a known local // definition. GV.setDSOLocal(true); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/HelloWorld.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/HelloWorld.cpp new file mode 100644 index 000000000000..7019e9e4451b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/HelloWorld.cpp @@ -0,0 +1,17 @@ +//===-- HelloWorld.cpp - Example Transformations --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/HelloWorld.h" + +using namespace llvm; + +PreservedAnalyses HelloWorldPass::run(Function &F, + FunctionAnalysisManager &AM) { + errs() << F.getName() << "\n"; + return PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index a2b72e4e7f03..a1e160d144dc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -40,7 +40,7 @@ STATISTIC(NumCompUsedAdded, /// lanes. The TLI assumes that all parameters and the return type of /// CI (other than void) need to be widened to a VectorType of VF /// lanes. -static void addVariantDeclaration(CallInst &CI, const unsigned VF, +static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, const StringRef VFName) { Module *M = CI.getModule(); @@ -89,9 +89,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { Module *M = CI.getModule(); const SetVector<StringRef> OriginalSetOfMappings(Mappings.begin(), Mappings.end()); - // All VFs in the TLI are powers of 2. - for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); VF <= WidestVF; - VF *= 2) { + + auto AddVariantDecl = [&](const ElementCount &VF) { const std::string TLIName = std::string(TLI.getVectorizedFunction(ScalarName, VF)); if (!TLIName.empty()) { @@ -105,7 +104,19 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { if (!VariantF) addVariantDeclaration(CI, VF, TLIName); } - } + }; + + // All VFs in the TLI are powers of 2. + ElementCount WidestFixedVF, WidestScalableVF; + TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF); + + for (ElementCount VF = ElementCount::getFixed(2); + ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2) + AddVariantDecl(VF); + + // TODO: Add scalable variants once we're able to test them. + assert(WidestScalableVF.isZero() && + "Scalable vector mappings not yet supported"); VFABI::setVectorVariantNames(&CI, Mappings); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp index fb271a2118ba..792aa8208f27 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -27,8 +27,9 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" @@ -44,6 +45,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -61,6 +63,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -543,9 +546,16 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); - if (!CI || CI->doesNotThrow() || CI->isInlineAsm()) + if (!CI || CI->doesNotThrow()) continue; + if (CI->isInlineAsm()) { + InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand()); + if (!IA->canThrow()) { + continue; + } + } + // We do not need to (and in fact, cannot) convert possibly throwing calls // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into // invokes. The caller's "segment" of the deoptimization continuation @@ -929,7 +939,8 @@ void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, - const DataLayout &DL, AAResults *CalleeAAR) { + const DataLayout &DL, AAResults *CalleeAAR, + ClonedCodeInfo &InlinedFunctionInfo) { if (!EnableNoAliasConversion) return; @@ -999,7 +1010,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, continue; Instruction *NI = dyn_cast<Instruction>(VMI->second); - if (!NI) + if (!NI || InlinedFunctionInfo.isSimplified(I, NI)) continue; bool IsArgMemOnlyCall = false, IsFuncCall = false; @@ -1025,6 +1036,11 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, IsFuncCall = true; if (CalleeAAR) { FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call); + + // We'll retain this knowledge without additional metadata. + if (AAResults::onlyAccessesInaccessibleMem(MRB)) + continue; + if (AAResults::onlyAccessesArgPointees(MRB)) IsArgMemOnlyCall = true; } @@ -1280,7 +1296,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(DL, ArgVal, Align); - AC->registerAssumption(NewAsmp); + AC->registerAssumption(cast<AssumeInst>(NewAsmp)); } } } @@ -1504,9 +1520,11 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, BI != BE; ++BI) { // Loop metadata needs to be updated so that the start and end locs // reference inlined-at locations. - auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes]( - const DILocation &Loc) -> DILocation * { - return inlineDebugLoc(&Loc, InlinedAtNode, Ctx, IANodes).get(); + auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, + &IANodes](Metadata *MD) -> Metadata * { + if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) + return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); + return MD; }; updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc); @@ -1636,6 +1654,99 @@ void llvm::updateProfileCallee( } } +/// An operand bundle "clang.arc.attachedcall" on a call indicates the call +/// result is implicitly consumed by a call to retainRV or claimRV immediately +/// after the call. This function inlines the retainRV/claimRV calls. +/// +/// There are three cases to consider: +/// +/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned +/// object in the callee return block, the autoreleaseRV call and the +/// retainRV/claimRV call in the caller cancel out. If the call in the caller +/// is a claimRV call, a call to objc_release is emitted. +/// +/// 2. If there is a call in the callee return block that doesn't have operand +/// bundle "clang.arc.attachedcall", the operand bundle on the original call +/// is transferred to the call in the callee. +/// +/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is +/// a retainRV call. +static void +inlineRetainOrClaimRVCalls(CallBase &CB, + const SmallVectorImpl<ReturnInst *> &Returns) { + Module *Mod = CB.getModule(); + bool IsRetainRV = objcarc::hasAttachedCallOpBundle(&CB, true), + IsClaimRV = !IsRetainRV; + + for (auto *RI : Returns) { + Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); + BasicBlock::reverse_iterator I = ++(RI->getIterator().getReverse()); + BasicBlock::reverse_iterator EI = RI->getParent()->rend(); + bool InsertRetainCall = IsRetainRV; + IRBuilder<> Builder(RI->getContext()); + + // Walk backwards through the basic block looking for either a matching + // autoreleaseRV call or an unannotated call. + for (; I != EI;) { + auto CurI = I++; + + // Ignore casts. + if (isa<CastInst>(*CurI)) + continue; + + if (auto *II = dyn_cast<IntrinsicInst>(&*CurI)) { + if (II->getIntrinsicID() == Intrinsic::objc_autoreleaseReturnValue && + II->hasNUses(0) && + objcarc::GetRCIdentityRoot(II->getOperand(0)) == RetOpnd) { + // If we've found a matching authoreleaseRV call: + // - If claimRV is attached to the call, insert a call to objc_release + // and erase the autoreleaseRV call. + // - If retainRV is attached to the call, just erase the autoreleaseRV + // call. + if (IsClaimRV) { + Builder.SetInsertPoint(II); + Function *IFn = + Intrinsic::getDeclaration(Mod, Intrinsic::objc_release); + Value *BC = + Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); + Builder.CreateCall(IFn, BC, ""); + } + II->eraseFromParent(); + InsertRetainCall = false; + } + } else if (auto *CI = dyn_cast<CallInst>(&*CurI)) { + if (objcarc::GetRCIdentityRoot(CI) == RetOpnd && + !objcarc::hasAttachedCallOpBundle(CI)) { + // If we've found an unannotated call that defines RetOpnd, add a + // "clang.arc.attachedcall" operand bundle. + Value *BundleArgs[] = {ConstantInt::get( + Builder.getInt64Ty(), + objcarc::getAttachedCallOperandBundleEnum(IsRetainRV))}; + OperandBundleDef OB("clang.arc.attachedcall", BundleArgs); + auto *NewCall = CallBase::addOperandBundle( + CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI); + NewCall->copyMetadata(*CI); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + InsertRetainCall = false; + } + } + + break; + } + + if (InsertRetainCall) { + // The retainRV is attached to the call and we've failed to find a + // matching autoreleaseRV or an annotated call in the callee. Emit a call + // to objc_retain. + Builder.SetInsertPoint(RI); + Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain); + Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); + Builder.CreateCall(IFn, BC, ""); + } + } +} + /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. @@ -1673,6 +1784,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // ... and "funclet" operand bundles. if (Tag == LLVMContext::OB_funclet) continue; + if (Tag == LLVMContext::OB_clang_arc_attachedcall) + continue; return InlineResult::failure("unsupported operand bundle"); } @@ -1835,17 +1948,27 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, &CB); + &InlinedFunctionInfo); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; - if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) - // Update the BFI of blocks cloned into the caller. - updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, - CalledFunc->front()); - - updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB, - IFI.PSI, IFI.CallerBFI); + // Insert retainRV/clainRV runtime calls. + if (objcarc::hasAttachedCallOpBundle(&CB)) + inlineRetainOrClaimRVCalls(CB, Returns); + + // Updated caller/callee profiles only when requested. For sample loader + // inlining, the context-sensitive inlinee profile doesn't need to be + // subtracted from callee profile, and the inlined clone also doesn't need + // to be scaled based on call site count. + if (IFI.UpdateProfile) { + if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) + // Update the BFI of blocks cloned into the caller. + updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, + CalledFunc->front()); + + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB, + IFI.PSI, IFI.CallerBFI); + } // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) @@ -1915,7 +2038,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); + AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); // Clone return attributes on the callsite into the calls within the inlined // function which feed into its return value. @@ -1929,9 +2052,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, for (BasicBlock &NewBlock : make_range(FirstNewBlock->getIterator(), Caller->end())) for (Instruction &I : NewBlock) - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - IFI.GetAssumptionCache(*Caller).registerAssumption(II); + if (auto *II = dyn_cast<AssumeInst>(&I)) + IFI.GetAssumptionCache(*Caller).registerAssumption(II); } // If there are any alloca instructions in the block that used to be the entry @@ -2068,7 +2190,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. - if (InsertLifetime && !IFI.StaticAllocas.empty()) { + // We need to insert lifetime intrinsics even at O0 to avoid invalid + // access caused by multithreaded coroutines. The check + // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. + if ((InsertLifetime || Caller->isPresplitCoroutine()) && + !IFI.StaticAllocas.empty()) { IRBuilder<> builder(&FirstNewBlock->front()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; @@ -2201,7 +2327,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // As such, we replace the cleanupret with unreachable. if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) - changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); + changeToUnreachable(CleanupRet); Instruction *I = BB->getFirstNonPHI(); if (!I->isEHPad()) @@ -2255,6 +2381,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, SmallVector<OperandBundleDef, 1> OpBundles; DeoptCall->getOperandBundlesAsDefs(OpBundles); + auto DeoptAttributes = DeoptCall->getAttributes(); DeoptCall->eraseFromParent(); assert(!OpBundles.empty() && "Expected at least the deopt operand bundle"); @@ -2263,6 +2390,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, CallInst *NewDeoptCall = Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); NewDeoptCall->setCallingConv(CallingConv); + NewDeoptCall->setAttributes(DeoptAttributes); if (NewDeoptCall->getType()->isVoidTy()) Builder.CreateRetVoid(); else @@ -2315,14 +2443,17 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // before we splice the inlined code into the CFG and lose track of which // blocks were actually inlined, collect the call sites. We only do this if // call graph updates weren't requested, as those provide value handle based - // tracking of inlined call sites instead. + // tracking of inlined call sites instead. Calls to intrinsics are not + // collected because they are not inlineable. if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { // Otherwise just collect the raw call sites that were inlined. for (BasicBlock &NewBB : make_range(FirstNewBlock->getIterator(), Caller->end())) for (Instruction &I : NewBB) if (auto *CB = dyn_cast<CallBase>(&I)) - IFI.InlinedCallSites.push_back(CB); + if (!(CB->getCalledFunction() && + CB->getCalledFunction()->isIntrinsic())) + IFI.InlinedCallSites.push_back(CB); } // If we cloned in _exactly one_ basic block, and if that block ends in a diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp index 7437701f5339..277fd903e9aa 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -48,7 +49,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -236,7 +236,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, llvm::findDbgValues(DbgValues, I); // Update pre-existing debug value uses that reside outside the loop. - auto &Ctx = I->getContext(); for (auto DVI : DbgValues) { BasicBlock *UserBB = DVI->getParent(); if (InstBB == UserBB || L->contains(UserBB)) @@ -247,7 +246,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0] : SSAUpdate.FindValueForBlock(UserBB); if (V) - DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V))); + DVI->replaceVariableLocationOp(I, V); } // SSAUpdater might have inserted phi-nodes inside other loops. We'll need @@ -504,9 +503,6 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); PA.preserve<ScalarEvolutionAnalysis>(); // BPI maps terminators to probabilities, since we don't modify the CFG, no // updates are needed to preserve it. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 4c52fac6f7cb..7e5832148bc0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -555,7 +555,6 @@ PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, if (!runImpl(F, TLI, DT)) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); - PA.preserve<GlobalsAA>(); PA.preserve<DominatorTreeAnalysis>(); return PA; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index ae26058c210c..d03d76f57ca1 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -65,6 +64,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -111,8 +111,8 @@ static cl::opt<unsigned> PHICSENumPHISmallSize( "perform a (faster!) exhaustive search instead of set-driven one.")); // Max recursion depth for collectBitParts used when detecting bswap and -// bitreverse idioms -static const unsigned BitPartRecursionMaxDepth = 64; +// bitreverse idioms. +static const unsigned BitPartRecursionMaxDepth = 48; //===----------------------------------------------------------------------===// // Local constant propagation. @@ -148,7 +148,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Dest1->removePredecessor(BI->getParent()); // Replace the conditional branch with an unconditional one. - Builder.CreateBr(Dest1); + BranchInst *NewBI = Builder.CreateBr(Dest1); + + // Transfer the metadata to the new branch instruction. + NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg, + LLVMContext::MD_annotation}); + Value *Cond = BI->getCondition(); BI->eraseFromParent(); if (DeleteDeadConditions) @@ -167,7 +172,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, OldDest->removePredecessor(BB); // Replace the conditional branch with an unconditional one. - Builder.CreateBr(Destination); + BranchInst *NewBI = Builder.CreateBr(Destination); + + // Transfer the metadata to the new branch instruction. + NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg, + LLVMContext::MD_annotation}); + BI->eraseFromParent(); if (DTU) DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}}); @@ -257,7 +267,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); - SmallSetVector<BasicBlock *, 8> RemovedSuccessors; + SmallSet<BasicBlock *, 8> RemovedSuccessors; // Remove entries from PHI nodes which we no longer branch to... BasicBlock *SuccToKeep = TheOnlyDest; @@ -329,7 +339,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, if (auto *BA = dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); - SmallSetVector<BasicBlock *, 8> RemovedSuccessors; + SmallSet<BasicBlock *, 8> RemovedSuccessors; // Insert the new branch. Builder.CreateBr(TheOnlyDest); @@ -410,7 +420,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return true; } if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { - if (DVI->getValue()) + if (DVI->hasArgList() || DVI->getValue(0)) return false; return true; } @@ -456,13 +466,18 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, // sophisticated tradeoffs for guards considering potential for check // widening, but for now we keep things simple. if ((II->getIntrinsicID() == Intrinsic::assume && - isAssumeWithEmptyBundle(*II)) || + isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) || II->getIntrinsicID() == Intrinsic::experimental_guard) { if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0))) return !Cond->isZero(); return false; } + + if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(I)) { + Optional<fp::ExceptionBehavior> ExBehavior = FPI->getExceptionBehavior(); + return ExBehavior.getValue() != fp::ebStrict; + } } if (isAllocLikeFn(I, TLI)) @@ -476,6 +491,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, if (isMathLibCallNoop(Call, TLI)) return true; + // To express possible interaction with floating point environment constrained + // intrinsics are described as if they access memory. So they look like having + // side effect but actually do not have it unless they raise floating point + // exception. If FP exceptions are ignored, the intrinsic may be deleted. + if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(I)) { + Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); + if (!EB || *EB == fp::ExceptionBehavior::ebIgnore) + return true; + } + return false; } @@ -565,8 +590,7 @@ bool llvm::replaceDbgUsesWithUndef(Instruction *I) { findDbgUsers(DbgUsers, I); for (auto *DII : DbgUsers) { Value *Undef = UndefValue::get(I->getType()); - DII->setOperand(0, MetadataAsValue::get(DII->getContext(), - ValueAsMetadata::get(Undef))); + DII->replaceVariableLocationOp(I, Undef); } return !DbgUsers.empty(); } @@ -729,21 +753,22 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); - bool ReplaceEntryBB = false; - if (PredBB == &DestBB->getParent()->getEntryBlock()) - ReplaceEntryBB = true; + bool ReplaceEntryBB = PredBB->isEntryBlock(); // DTU updates: Collect all the edges that enter // PredBB. These dominator edges will be redirected to DestBB. SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { - for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { + SmallPtrSet<BasicBlock *, 2> PredsOfPredBB(pred_begin(PredBB), + pred_end(PredBB)); + Updates.reserve(Updates.size() + 2 * PredsOfPredBB.size() + 1); + for (BasicBlock *PredOfPredBB : PredsOfPredBB) // This predecessor of PredBB may already have DestBB as a successor. - if (!llvm::is_contained(successors(*I), DestBB)) - Updates.push_back({DominatorTree::Insert, *I, DestBB}); - Updates.push_back({DominatorTree::Delete, *I, PredBB}); - } + if (PredOfPredBB != PredBB) + Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); + for (BasicBlock *PredOfPredBB : PredsOfPredBB) + Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); } @@ -1057,8 +1082,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, // We cannot fold the block if it's a branch to an already present callbr // successor because that creates duplicate successors. - for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { - if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) { + for (BasicBlock *PredBB : predecessors(BB)) { + if (auto *CBI = dyn_cast<CallBrInst>(PredBB->getTerminator())) { if (Succ == CBI->getDefaultDest()) return false; for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i) @@ -1072,14 +1097,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { // All predecessors of BB will be moved to Succ. - SmallSetVector<BasicBlock *, 8> Predecessors(pred_begin(BB), pred_end(BB)); - Updates.reserve(Updates.size() + 2 * Predecessors.size()); - for (auto *Predecessor : Predecessors) { + SmallPtrSet<BasicBlock *, 8> PredsOfBB(pred_begin(BB), pred_end(BB)); + SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ)); + Updates.reserve(Updates.size() + 2 * PredsOfBB.size() + 1); + for (auto *PredOfBB : PredsOfBB) // This predecessor of BB may already have Succ as a successor. - if (!llvm::is_contained(successors(Predecessor), Succ)) - Updates.push_back({DominatorTree::Insert, Predecessor, Succ}); - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); - } + if (!PredsOfSucc.contains(PredOfBB)) + Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); + for (auto *PredOfBB : PredsOfBB) + Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); Updates.push_back({DominatorTree::Delete, BB, Succ}); } @@ -1119,10 +1145,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, Instruction *TI = BB->getTerminator(); if (TI) if (MDNode *LoopMD = TI->getMetadata(LoopMDKind)) - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *Pred = *PI; + for (BasicBlock *Pred : predecessors(BB)) Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD); - } // Everything that jumped to BB now goes to Succ. BB->replaceAllUsesWith(Succ); @@ -1135,12 +1159,11 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, assert(succ_empty(BB) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - if (DTU) { + if (DTU) DTU->applyUpdates(Updates); - DTU->deleteBB(BB); - } else { - BB->eraseFromParent(); // Delete the old basic block. - } + + DeleteDeadBlock(BB, DTU); + return true; } @@ -1356,7 +1379,7 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, SmallVector<DbgValueInst *, 1> DbgValues; findDbgValues(DbgValues, APN); for (auto *DVI : DbgValues) { - assert(DVI->getValue() == APN); + assert(is_contained(DVI->getValues(), APN)); if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr)) return true; } @@ -1383,13 +1406,19 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { // We can't always calculate the size of the DI variable (e.g. if it is a // VLA). Try to use the size of the alloca that the dbg intrinsic describes // intead. - if (DII->isAddressOfVariable()) - if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation())) + if (DII->isAddressOfVariable()) { + // DII should have exactly 1 location when it is an address. + assert(DII->getNumVariableLocationOps() == 1 && + "address of variable must have exactly 1 location operand."); + if (auto *AI = + dyn_cast_or_null<AllocaInst>(DII->getVariableLocationOp(0))) { if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) { assert(ValueSize.isScalable() == FragmentSize->isScalable() && "Both sizes should agree on the scalable flag."); return TypeSize::isKnownGE(ValueSize, *FragmentSize); } + } + } // Could not determine size of variable. Conservatively return false. return false; } @@ -1400,7 +1429,7 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { /// case this DebugLoc leaks into any adjacent instructions. static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) { // Original dbg.declare must have a location. - DebugLoc DeclareLoc = DII->getDebugLoc(); + const DebugLoc &DeclareLoc = DII->getDebugLoc(); MDNode *Scope = DeclareLoc.getScope(); DILocation *InlinedAt = DeclareLoc.getInlinedAt(); // Produce an unknown location with the correct scope / inlinedAt fields. @@ -1592,93 +1621,56 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB, ValueToValueMapTy DbgValueMap; for (auto &I : *BB) { if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) { - if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation())) - DbgValueMap.insert({Loc, DbgII}); + for (Value *V : DbgII->location_ops()) + if (auto *Loc = dyn_cast_or_null<PHINode>(V)) + DbgValueMap.insert({Loc, DbgII}); } } if (DbgValueMap.size() == 0) return; + // Map a pair of the destination BB and old dbg.value to the new dbg.value, + // so that if a dbg.value is being rewritten to use more than one of the + // inserted PHIs in the same destination BB, we can update the same dbg.value + // with all the new PHIs instead of creating one copy for each. + MapVector<std::pair<BasicBlock *, DbgVariableIntrinsic *>, + DbgVariableIntrinsic *> + NewDbgValueMap; // Then iterate through the new PHIs and look to see if they use one of the - // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will - // propagate the info through the new PHI. - LLVMContext &C = BB->getContext(); + // previously mapped PHIs. If so, create a new dbg.value intrinsic that will + // propagate the info through the new PHI. If we use more than one new PHI in + // a single destination BB with the same old dbg.value, merge the updates so + // that we get a single new dbg.value with all the new PHIs. for (auto PHI : InsertedPHIs) { BasicBlock *Parent = PHI->getParent(); // Avoid inserting an intrinsic into an EH block. if (Parent->getFirstNonPHI()->isEHPad()) continue; - auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI)); for (auto VI : PHI->operand_values()) { auto V = DbgValueMap.find(VI); if (V != DbgValueMap.end()) { auto *DbgII = cast<DbgVariableIntrinsic>(V->second); - Instruction *NewDbgII = DbgII->clone(); - NewDbgII->setOperand(0, PhiMAV); - auto InsertionPt = Parent->getFirstInsertionPt(); - assert(InsertionPt != Parent->end() && "Ill-formed basic block"); - NewDbgII->insertBefore(&*InsertionPt); + auto NewDI = NewDbgValueMap.find({Parent, DbgII}); + if (NewDI == NewDbgValueMap.end()) { + auto *NewDbgII = cast<DbgVariableIntrinsic>(DbgII->clone()); + NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first; + } + DbgVariableIntrinsic *NewDbgII = NewDI->second; + // If PHI contains VI as an operand more than once, we may + // replaced it in NewDbgII; confirm that it is present. + if (is_contained(NewDbgII->location_ops(), VI)) + NewDbgII->replaceVariableLocationOp(VI, PHI); } } } -} - -/// Finds all intrinsics declaring local variables as living in the memory that -/// 'V' points to. This may include a mix of dbg.declare and -/// dbg.addr intrinsics. -TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) { - // This function is hot. Check whether the value has any metadata to avoid a - // DenseMap lookup. - if (!V->isUsedByMetadata()) - return {}; - auto *L = LocalAsMetadata::getIfExists(V); - if (!L) - return {}; - auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L); - if (!MDV) - return {}; - - TinyPtrVector<DbgVariableIntrinsic *> Declares; - for (User *U : MDV->users()) { - if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U)) - if (DII->isAddressOfVariable()) - Declares.push_back(DII); + // Insert thew new dbg.values into their destination blocks. + for (auto DI : NewDbgValueMap) { + BasicBlock *Parent = DI.first.first; + auto *NewDbgII = DI.second; + auto InsertionPt = Parent->getFirstInsertionPt(); + assert(InsertionPt != Parent->end() && "Ill-formed basic block"); + NewDbgII->insertBefore(&*InsertionPt); } - - return Declares; -} - -TinyPtrVector<DbgDeclareInst *> llvm::FindDbgDeclareUses(Value *V) { - TinyPtrVector<DbgDeclareInst *> DDIs; - for (DbgVariableIntrinsic *DVI : FindDbgAddrUses(V)) - if (auto *DDI = dyn_cast<DbgDeclareInst>(DVI)) - DDIs.push_back(DDI); - return DDIs; -} - -void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { - // This function is hot. Check whether the value has any metadata to avoid a - // DenseMap lookup. - if (!V->isUsedByMetadata()) - return; - if (auto *L = LocalAsMetadata::getIfExists(V)) - if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) - for (User *U : MDV->users()) - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U)) - DbgValues.push_back(DVI); -} - -void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers, - Value *V) { - // This function is hot. Check whether the value has any metadata to avoid a - // DenseMap lookup. - if (!V->isUsedByMetadata()) - return; - if (auto *L = LocalAsMetadata::getIfExists(V)) - if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) - for (User *U : MDV->users()) - if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U)) - DbgUsers.push_back(DII); } bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, @@ -1686,7 +1678,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, int Offset) { auto DbgAddrs = FindDbgAddrUses(Address); for (DbgVariableIntrinsic *DII : DbgAddrs) { - DebugLoc Loc = DII->getDebugLoc(); + const DebugLoc &Loc = DII->getDebugLoc(); auto *DIVar = DII->getVariable(); auto *DIExpr = DII->getExpression(); assert(DIVar && "Missing variable"); @@ -1701,7 +1693,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, DIBuilder &Builder, int Offset) { - DebugLoc Loc = DVI->getDebugLoc(); + const DebugLoc &Loc = DVI->getDebugLoc(); auto *DIVar = DVI->getVariable(); auto *DIExpr = DVI->getExpression(); assert(DIVar && "Missing variable"); @@ -1726,16 +1718,9 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, DIBuilder &Builder, int Offset) { if (auto *L = LocalAsMetadata::getIfExists(AI)) if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) - for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) { - Use &U = *UI++; + for (Use &U : llvm::make_early_inc_range(MDV->uses())) if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser())) replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset); - } -} - -/// Wrap \p V in a ValueAsMetadata instance. -static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) { - return MetadataAsValue::get(C, ValueAsMetadata::get(V)); } /// Where possible to salvage debug information for \p I do so @@ -1748,26 +1733,53 @@ void llvm::salvageDebugInfo(Instruction &I) { void llvm::salvageDebugInfoForDbgValues( Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) { - auto &Ctx = I.getContext(); + // This is an arbitrary chosen limit on the maximum number of values we can + // salvage up to in a DIArgList, used for performance reasons. + const unsigned MaxDebugArgs = 16; bool Salvaged = false; - auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); }; for (auto *DII : DbgUsers) { // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they // are implicitly pointing out the value as a DWARF memory location // description. bool StackValue = isa<DbgValueInst>(DII); - - DIExpression *DIExpr = - salvageDebugInfoImpl(I, DII->getExpression(), StackValue); - + auto DIILocation = DII->location_ops(); + assert( + is_contained(DIILocation, &I) && + "DbgVariableIntrinsic must use salvaged instruction as its location"); + SmallVector<Value *, 4> AdditionalValues; + // `I` may appear more than once in DII's location ops, and each use of `I` + // must be updated in the DIExpression and potentially have additional + // values added; thus we call salvageDebugInfoImpl for each `I` instance in + // DIILocation. + DIExpression *SalvagedExpr = DII->getExpression(); + auto LocItr = find(DIILocation, &I); + while (SalvagedExpr && LocItr != DIILocation.end()) { + unsigned LocNo = std::distance(DIILocation.begin(), LocItr); + SalvagedExpr = salvageDebugInfoImpl(I, SalvagedExpr, StackValue, LocNo, + AdditionalValues); + LocItr = std::find(++LocItr, DIILocation.end(), &I); + } // salvageDebugInfoImpl should fail on examining the first element of // DbgUsers, or none of them. - if (!DIExpr) + if (!SalvagedExpr) break; - DII->setOperand(0, wrapMD(I.getOperand(0))); - DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); + DII->replaceVariableLocationOp(&I, I.getOperand(0)); + if (AdditionalValues.empty()) { + DII->setExpression(SalvagedExpr); + } else if (isa<DbgValueInst>(DII) && + DII->getNumVariableLocationOps() + AdditionalValues.size() <= + MaxDebugArgs) { + DII->addVariableLocationOps(AdditionalValues, SalvagedExpr); + } else { + // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is + // currently only valid for stack value expressions. + // Also do not salvage if the resulting DIArgList would contain an + // unreasonably large number of values. + Value *Undef = UndefValue::get(I.getOperand(0)->getType()); + DII->replaceVariableLocationOp(I.getOperand(0), Undef); + } LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); Salvaged = true; } @@ -1777,14 +1789,111 @@ void llvm::salvageDebugInfoForDbgValues( for (auto *DII : DbgUsers) { Value *Undef = UndefValue::get(I.getType()); - DII->setOperand(0, MetadataAsValue::get(DII->getContext(), - ValueAsMetadata::get(Undef))); + DII->replaceVariableLocationOp(&I, Undef); + } +} + +bool getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL, + uint64_t CurrentLocOps, + SmallVectorImpl<uint64_t> &Opcodes, + SmallVectorImpl<Value *> &AdditionalValues) { + unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace()); + // Rewrite a GEP into a DIExpression. + MapVector<Value *, APInt> VariableOffsets; + APInt ConstantOffset(BitWidth, 0); + if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) + return false; + if (!VariableOffsets.empty() && !CurrentLocOps) { + Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0}); + CurrentLocOps = 1; + } + for (auto Offset : VariableOffsets) { + AdditionalValues.push_back(Offset.first); + assert(Offset.second.isStrictlyPositive() && + "Expected strictly positive multiplier for offset."); + Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps++, dwarf::DW_OP_constu, + Offset.second.getZExtValue(), dwarf::DW_OP_mul, + dwarf::DW_OP_plus}); + } + DIExpression::appendOffset(Opcodes, ConstantOffset.getSExtValue()); + return true; +} + +uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) { + switch (Opcode) { + case Instruction::Add: + return dwarf::DW_OP_plus; + case Instruction::Sub: + return dwarf::DW_OP_minus; + case Instruction::Mul: + return dwarf::DW_OP_mul; + case Instruction::SDiv: + return dwarf::DW_OP_div; + case Instruction::SRem: + return dwarf::DW_OP_mod; + case Instruction::Or: + return dwarf::DW_OP_or; + case Instruction::And: + return dwarf::DW_OP_and; + case Instruction::Xor: + return dwarf::DW_OP_xor; + case Instruction::Shl: + return dwarf::DW_OP_shl; + case Instruction::LShr: + return dwarf::DW_OP_shr; + case Instruction::AShr: + return dwarf::DW_OP_shra; + default: + // TODO: Salvage from each kind of binop we know about. + return 0; + } +} + +bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps, + SmallVectorImpl<uint64_t> &Opcodes, + SmallVectorImpl<Value *> &AdditionalValues) { + // Handle binary operations with constant integer operands as a special case. + auto *ConstInt = dyn_cast<ConstantInt>(BI->getOperand(1)); + // Values wider than 64 bits cannot be represented within a DIExpression. + if (ConstInt && ConstInt->getBitWidth() > 64) + return false; + + Instruction::BinaryOps BinOpcode = BI->getOpcode(); + // Push any Constant Int operand onto the expression stack. + if (ConstInt) { + uint64_t Val = ConstInt->getSExtValue(); + // Add or Sub Instructions with a constant operand can potentially be + // simplified. + if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) { + uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val); + DIExpression::appendOffset(Opcodes, Offset); + return true; + } + Opcodes.append({dwarf::DW_OP_constu, Val}); + } else { + if (!CurrentLocOps) { + Opcodes.append({dwarf::DW_OP_LLVM_arg, 0}); + CurrentLocOps = 1; + } + Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps}); + AdditionalValues.push_back(BI->getOperand(1)); } + + // Add salvaged binary operator to expression stack, if it has a valid + // representation in a DIExpression. + uint64_t DwarfBinOp = getDwarfOpForBinOp(BinOpcode); + if (!DwarfBinOp) + return false; + Opcodes.push_back(DwarfBinOp); + + return true; } -DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, - DIExpression *SrcDIExpr, - bool WithStackValue) { +DIExpression * +llvm::salvageDebugInfoImpl(Instruction &I, DIExpression *SrcDIExpr, + bool WithStackValue, unsigned LocNo, + SmallVectorImpl<Value *> &AdditionalValues) { + uint64_t CurrentLocOps = SrcDIExpr->getNumLocationOperands(); auto &M = *I.getModule(); auto &DL = M.getDataLayout(); @@ -1792,20 +1901,13 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * { DIExpression *DIExpr = SrcDIExpr; if (!Ops.empty()) { - DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); + DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, LocNo, WithStackValue); } return DIExpr; }; - // Apply the given offset to the source DIExpression. - auto applyOffset = [&](uint64_t Offset) -> DIExpression * { - SmallVector<uint64_t, 8> Ops; - DIExpression::appendOffset(Ops, Offset); - return doSalvage(Ops); - }; - // initializer-list helper for applying operators to the source DIExpression. - auto applyOps = [&](ArrayRef<uint64_t> Opcodes) -> DIExpression * { + auto applyOps = [&](ArrayRef<uint64_t> Opcodes) { SmallVector<uint64_t, 8> Ops(Opcodes.begin(), Opcodes.end()); return doSalvage(Ops); }; @@ -1829,54 +1931,17 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, isa<SExtInst>(&I))); } + SmallVector<uint64_t, 8> Ops; if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { - unsigned BitWidth = - M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace()); - // Rewrite a constant GEP into a DIExpression. - APInt Offset(BitWidth, 0); - if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { - return applyOffset(Offset.getSExtValue()); - } else { - return nullptr; - } + if (getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues)) + return doSalvage(Ops); } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) { - // Rewrite binary operations with constant integer operands. - auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)); - if (!ConstInt || ConstInt->getBitWidth() > 64) - return nullptr; - - uint64_t Val = ConstInt->getSExtValue(); - switch (BI->getOpcode()) { - case Instruction::Add: - return applyOffset(Val); - case Instruction::Sub: - return applyOffset(-int64_t(Val)); - case Instruction::Mul: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul}); - case Instruction::SDiv: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div}); - case Instruction::SRem: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod}); - case Instruction::Or: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or}); - case Instruction::And: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and}); - case Instruction::Xor: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor}); - case Instruction::Shl: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl}); - case Instruction::LShr: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr}); - case Instruction::AShr: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra}); - default: - // TODO: Salvage constants from each kind of binop we know about. - return nullptr; - } - // *Not* to do: we should not attempt to salvage load instructions, - // because the validity and lifetime of a dbg.value containing - // DW_OP_deref becomes difficult to analyze. See PR40628 for examples. + if (getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues)) + return doSalvage(Ops); } + // *Not* to do: we should not attempt to salvage load instructions, + // because the validity and lifetime of a dbg.value containing + // DW_OP_deref becomes difficult to analyze. See PR40628 for examples. return nullptr; } @@ -1922,13 +1987,12 @@ static bool rewriteDebugUsers( if (UndefOrSalvage.count(DII)) continue; - LLVMContext &Ctx = DII->getContext(); DbgValReplacement DVR = RewriteExpr(*DII); if (!DVR) continue; - DII->setOperand(0, wrapValueInMetadata(Ctx, &To)); - DII->setOperand(2, MetadataAsValue::get(Ctx, *DVR)); + DII->replaceVariableLocationOp(&From, &To); + DII->setExpression(*DVR); LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n'); Changed = true; } @@ -2046,15 +2110,15 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { return {NumDeadInst, NumDeadDbgInst}; } -unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA, DomTreeUpdater *DTU, +unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA, + DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU) { BasicBlock *BB = I->getParent(); if (MSSAU) MSSAU->changeToUnreachable(I); - SmallSetVector<BasicBlock *, 8> UniqueSuccessors; + SmallSet<BasicBlock *, 8> UniqueSuccessors; // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -2063,14 +2127,6 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, if (DTU) UniqueSuccessors.insert(Successor); } - // Insert a call to llvm.trap right before this. This turns the undefined - // behavior into a hard fail instead of falling through into random code. - if (UseLLVMTrap) { - Function *TrapFn = - Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); - CallInst *CallTrap = CallInst::Create(TrapFn, "", I); - CallTrap->setDebugLoc(I->getDebugLoc()); - } auto *UI = new UnreachableInst(I->getContext(), I); UI->setDebugLoc(I->getDebugLoc()); @@ -2139,15 +2195,16 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, - BasicBlock *UnwindEdge) { + BasicBlock *UnwindEdge, + DomTreeUpdater *DTU) { BasicBlock *BB = CI->getParent(); // Convert this function call into an invoke instruction. First, split the // basic block. - BasicBlock *Split = - BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc"); + BasicBlock *Split = SplitBlock(BB, CI, DTU, /*LI=*/nullptr, /*MSSAU*/ nullptr, + CI->getName() + ".noexc"); - // Delete the unconditional branch inserted by splitBasicBlock + // Delete the unconditional branch inserted by SplitBlock BB->getInstList().pop_back(); // Create the new invoke instruction. @@ -2167,6 +2224,9 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, UnwindEdge}}); + // Make sure that anything using the call now uses the invoke! This also // updates the CallGraph if present, because it uses a WeakTrackingVH. CI->replaceAllUsesWith(II); @@ -2203,7 +2263,7 @@ static bool markAliveBlocks(Function &F, if (IntrinsicID == Intrinsic::assume) { if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI, false, false, DTU); + changeToUnreachable(CI, false, DTU); Changed = true; break; } @@ -2219,8 +2279,7 @@ static bool markAliveBlocks(Function &F, // still be useful for widening. if (match(CI->getArgOperand(0), m_Zero())) if (!isa<UnreachableInst>(CI->getNextNode())) { - changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false, - false, DTU); + changeToUnreachable(CI->getNextNode(), false, DTU); Changed = true; break; } @@ -2228,7 +2287,7 @@ static bool markAliveBlocks(Function &F, } else if ((isa<ConstantPointerNull>(Callee) && !NullPointerIsDefined(CI->getFunction())) || isa<UndefValue>(Callee)) { - changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DTU); + changeToUnreachable(CI, false, DTU); Changed = true; break; } @@ -2238,7 +2297,7 @@ static bool markAliveBlocks(Function &F, // though. if (!isa<UnreachableInst>(CI->getNextNode())) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI->getNextNode(), false, false, DTU); + changeToUnreachable(CI->getNextNode(), false, DTU); Changed = true; } break; @@ -2257,7 +2316,7 @@ static bool markAliveBlocks(Function &F, (isa<ConstantPointerNull>(Ptr) && !NullPointerIsDefined(SI->getFunction(), SI->getPointerAddressSpace()))) { - changeToUnreachable(SI, true, false, DTU); + changeToUnreachable(SI, false, DTU); Changed = true; break; } @@ -2271,7 +2330,7 @@ static bool markAliveBlocks(Function &F, if ((isa<ConstantPointerNull>(Callee) && !NullPointerIsDefined(BB->getParent())) || isa<UndefValue>(Callee)) { - changeToUnreachable(II, true, false, DTU); + changeToUnreachable(II, false, DTU); Changed = true; } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { if (II->use_empty() && II->onlyReadsMemory()) { @@ -2311,7 +2370,7 @@ static bool markAliveBlocks(Function &F, } }; - SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; // Set of unique CatchPads. SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> @@ -2321,22 +2380,25 @@ static bool markAliveBlocks(Function &F, E = CatchSwitch->handler_end(); I != E; ++I) { BasicBlock *HandlerBB = *I; - ++NumPerSuccessorCases[HandlerBB]; + if (DTU) + ++NumPerSuccessorCases[HandlerBB]; auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); if (!HandlerSet.insert({CatchPad, Empty}).second) { - --NumPerSuccessorCases[HandlerBB]; + if (DTU) + --NumPerSuccessorCases[HandlerBB]; CatchSwitch->removeHandler(I); --I; --E; Changed = true; } } - std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, BB, I.first}); - if (DTU) + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, BB, I.first}); DTU->applyUpdates(Updates); + } } Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU); @@ -2418,44 +2480,7 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, if (MSSAU) MSSAU->removeBlocks(BlocksToRemove); - // Loop over all of the basic blocks that are up for removal, dropping all of - // their internal references. Update DTU if available. - std::vector<DominatorTree::UpdateType> Updates; - for (auto *BB : BlocksToRemove) { - SmallSetVector<BasicBlock *, 8> UniqueSuccessors; - for (BasicBlock *Successor : successors(BB)) { - // Only remove references to BB in reachable successors of BB. - if (Reachable.count(Successor)) - Successor->removePredecessor(BB); - if (DTU) - UniqueSuccessors.insert(Successor); - } - BB->dropAllReferences(); - if (DTU) { - Instruction *TI = BB->getTerminator(); - assert(TI && "Basic block should have a terminator"); - // Terminators like invoke can have users. We have to replace their users, - // before removing them. - if (!TI->use_empty()) - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); - TI->eraseFromParent(); - new UnreachableInst(BB->getContext(), BB); - assert(succ_empty(BB) && "The successor list of BB isn't empty before " - "applying corresponding DTU updates."); - Updates.reserve(Updates.size() + UniqueSuccessors.size()); - for (auto *UniqueSuccessor : UniqueSuccessors) - Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); - } - } - - if (DTU) { - DTU->applyUpdates(Updates); - for (auto *BB : BlocksToRemove) - DTU->deleteBB(BB); - } else { - for (auto *BB : BlocksToRemove) - BB->eraseFromParent(); - } + DeleteDeadBlocks(BlocksToRemove.takeVector(), DTU); return Changed; } @@ -2686,11 +2711,10 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlock *BB) { - auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) { - auto *I = cast<Instruction>(U.getUser())->getParent(); - return DT.properlyDominates(BB, I); + auto Dominates = [&DT](const BasicBlock *BB, const Use &U) { + return DT.dominates(BB, U); }; - return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); + return ::replaceDominatedUsesWith(From, To, BB, Dominates); } bool llvm::callsGCLeafFunction(const CallBase *Call, @@ -2795,13 +2819,14 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to // encode predicated DIExpressions that yield different results on different // code paths. + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { Instruction *I = &*II; - I->dropUnknownNonDebugMetadata(); + I->dropUndefImplyingAttrsAndUnknownMetadata(); if (I->isUsedByMetadata()) dropDebugUsers(*I); - if (isa<DbgInfoIntrinsic>(I)) { - // Remove DbgInfo Intrinsics. + if (I->isDebugOrPseudoInst()) { + // Remove DbgInfo and pseudo probe Intrinsics. II = I->eraseFromParent(); continue; } @@ -2863,7 +2888,8 @@ struct BitPart { /// does not invalidate internal references (std::map instead of DenseMap). static const Optional<BitPart> & collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, - std::map<Value *, Optional<BitPart>> &BPS, int Depth) { + std::map<Value *, Optional<BitPart>> &BPS, int Depth, + bool &FoundRoot) { auto I = BPS.find(V); if (I != BPS.end()) return I->second; @@ -2871,6 +2897,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, auto &Result = BPS[V] = None; auto BitWidth = V->getType()->getScalarSizeInBits(); + // Can't do integer/elements > 128 bits. + if (BitWidth > 128) + return Result; + // Prevent stack overflow by limiting the recursion depth if (Depth == BitPartRecursionMaxDepth) { LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n"); @@ -2883,17 +2913,18 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is an or instruction, it may be an inner node of the bswap. if (match(V, m_Or(m_Value(X), m_Value(Y)))) { - const auto &A = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - const auto &B = - collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - if (!A || !B) + // Check we have both sources and they are from the same provider. + const auto &A = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!A || !A->Provider) return Result; - // Try and merge the two together. - if (!A->Provider || A->Provider != B->Provider) + const auto &B = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!B || A->Provider != B->Provider) return Result; + // Try and merge the two together. Result = BitPart(A->Provider, BitWidth); for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) { if (A->Provenance[BitIdx] != BitPart::Unset && @@ -2918,8 +2949,12 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (BitShift.uge(BitWidth)) return Result; - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + // For bswap-only, limit shift amounts to whole bytes, for an early exit. + if (!MatchBitReversals && (BitShift.getZExtValue() % 8) != 0) + return Result; + + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); if (!Res) return Result; Result = Res; @@ -2948,8 +2983,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (!MatchBitReversals && (NumMaskedBits % 8) != 0) return Result; - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); if (!Res) return Result; Result = Res; @@ -2963,8 +2998,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a zext instruction zero extend the result. if (match(V, m_ZExt(m_Value(X)))) { - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); if (!Res) return Result; @@ -2977,11 +3012,24 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } + // If this is a truncate instruction, extract the lower bits. + if (match(V, m_Trunc(m_Value(X)))) { + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; + return Result; + } + // BITREVERSE - most likely due to us previous matching a partial // bitreverse. if (match(V, m_BitReverse(m_Value(X)))) { - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); if (!Res) return Result; @@ -2993,8 +3041,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // BSWAP - most likely due to us previous matching a partial bswap. if (match(V, m_BSwap(m_Value(X)))) { - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); if (!Res) return Result; @@ -3020,13 +3068,19 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr) ModAmt = BitWidth - ModAmt; - const auto &LHS = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - const auto &RHS = - collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + // For bswap-only, limit shift amounts to whole bytes, for an early exit. + if (!MatchBitReversals && (ModAmt % 8) != 0) + return Result; // Check we have both sources and they are from the same provider. - if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider) + const auto &LHS = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!LHS || !LHS->Provider) + return Result; + + const auto &RHS = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, + Depth + 1, FoundRoot); + if (!RHS || LHS->Provider != RHS->Provider) return Result; unsigned StartBitRHS = BitWidth - ModAmt; @@ -3039,8 +3093,14 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, } } - // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap/bitreverse. + // If we've already found a root input value then we're never going to merge + // these back together. + if (FoundRoot) + return Result; + + // Okay, we got to something that isn't a shift, 'or', 'and', etc. This must + // be the root input value to the bswap/bitreverse. + FoundRoot = true; Result = BitPart(V, BitWidth); for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) Result->Provenance[BitIdx] = BitIdx; @@ -3066,7 +3126,9 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, bool llvm::recognizeBSwapOrBitReverseIdiom( Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl<Instruction *> &InsertedInsts) { - if (Operator::getOpcode(I) != Instruction::Or) + if (!match(I, m_Or(m_Value(), m_Value())) && + !match(I, m_FShl(m_Value(), m_Value(), m_Value())) && + !match(I, m_FShr(m_Value(), m_Value(), m_Value()))) return false; if (!MatchBSwaps && !MatchBitReversals) return false; @@ -3080,8 +3142,10 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( DemandedTy = Trunc->getType(); // Try to find all the pieces corresponding to the bswap. + bool FoundRoot = false; std::map<Value *, Optional<BitPart>> BPS; - auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0); + const auto &Res = + collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0, FoundRoot); if (!Res) return false; ArrayRef<int8_t> BitProvenance = Res->Provenance; @@ -3280,3 +3344,33 @@ Value *llvm::invertCondition(Value *Condition) { Inverted->insertBefore(&*Parent->getFirstInsertionPt()); return Inverted; } + +bool llvm::inferAttributesFromOthers(Function &F) { + // Note: We explicitly check for attributes rather than using cover functions + // because some of the cover functions include the logic being implemented. + + bool Changed = false; + // readnone + not convergent implies nosync + if (!F.hasFnAttribute(Attribute::NoSync) && + F.doesNotAccessMemory() && !F.isConvergent()) { + F.setNoSync(); + Changed = true; + } + + // readonly implies nofree + if (!F.hasFnAttribute(Attribute::NoFree) && F.onlyReadsMemory()) { + F.setDoesNotFreeMemory(); + Changed = true; + } + + // willreturn implies mustprogress + if (!F.hasFnAttribute(Attribute::MustProgress) && F.willReturn()) { + F.setMustProgress(); + Changed = true; + } + + // TODO: There are a bunch of cases of restrictive memory effects we + // can infer by inspecting arguments of argmemonly-ish functions. + + return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp index befacb591762..cd1f6f0c78a5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -211,9 +211,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // Do not consider predicates that are known to be true or false // independently of the loop iteration. - if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || - SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, - RightSCEV)) + if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV)) continue; // Check if we have a condition with one AddRec and one non AddRec diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index b678efdc8d88..ff7905bed91d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -26,7 +26,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -46,6 +46,10 @@ using namespace llvm; STATISTIC(NumNotRotatedDueToHeaderSize, "Number of loops not rotated due to the header size"); +STATISTIC(NumInstrsHoisted, + "Number of instructions hoisted into loop preheader"); +STATISTIC(NumInstrsDuplicated, + "Number of instructions cloned into loop preheader"); STATISTIC(NumRotated, "Number of loops rotated"); static cl::opt<bool> @@ -179,9 +183,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, NewVal = SSA.GetValueInMiddleOfBlock(UserBB); else NewVal = UndefValue::get(OrigHeaderVal->getType()); - DbgValue->setOperand(0, - MetadataAsValue::get(OrigHeaderVal->getContext(), - ValueAsMetadata::get(NewVal))); + DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal); } } } @@ -386,11 +388,15 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // possible or create a clone in the OldPreHeader if not. Instruction *LoopEntryBranch = OrigPreheader->getTerminator(); - // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication. + // Record all debug intrinsics preceding LoopEntryBranch to avoid + // duplication. using DbgIntrinsicHash = - std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>; + std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>; auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash { - return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()}; + auto VarLocOps = D->location_ops(); + return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()), + D->getVariable()}, + D->getExpression()}; }; SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend(); @@ -422,11 +428,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { !Inst->mayWriteToMemory() && !Inst->isTerminator() && !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) { Inst->moveBefore(LoopEntryBranch); + ++NumInstrsHoisted; continue; } // Otherwise, create a duplicate of the instruction. Instruction *C = Inst->clone(); + ++NumInstrsDuplicated; // Eagerly remap the operands of the instruction. RemapInstruction(C, ValueMap, @@ -459,9 +467,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { C->setName(Inst->getName()); C->insertBefore(LoopEntryBranch); - if (auto *II = dyn_cast<IntrinsicInst>(C)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); + if (auto *II = dyn_cast<AssumeInst>(C)) + AC->registerAssumption(II); // MemorySSA cares whether the cloned instruction was inserted or not, and // not whether it can be remapped to a simplified value. if (MSSAU) @@ -630,6 +637,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { } assert(SplitLatchEdge && "Despite splitting all preds, failed to split latch exit?"); + (void)SplitLatchEdge; } else { // We can fold the conditional branch in the preheader, this makes things // simpler. The first step is to remove the extra edge to the Exit block. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 2e104334ad96..d2fd32c98d73 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -127,9 +127,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, // Compute the set of predecessors of the loop that are not in the loop. SmallVector<BasicBlock*, 8> OutsideBlocks; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(Header)) { if (!L->contains(P)) { // Coming in from outside the loop? // If the loop is branched to from an indirect terminator, we won't // be able to fully transform the loop, because it prohibits @@ -381,9 +379,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; - for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ - BasicBlock *P = *I; - + for (BasicBlock *P : predecessors(Header)) { // Indirect edges cannot be split, so we must fail if we find one. if (P->getTerminator()->isIndirectTerminator()) return nullptr; @@ -505,12 +501,9 @@ ReprocessLoop: if (*BB == L->getHeader()) continue; SmallPtrSet<BasicBlock*, 4> BadPreds; - for (pred_iterator PI = pred_begin(*BB), - PE = pred_end(*BB); PI != PE; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(*BB)) if (!L->contains(P)) BadPreds.insert(P); - } // Delete each unique out-of-loop (and thus dead) predecessor. for (BasicBlock *P : BadPreds) { @@ -520,7 +513,7 @@ ReprocessLoop: // Zap the dead pred's terminator and replace it with unreachable. Instruction *TI = P->getTerminator(); - changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA, + changeToUnreachable(TI, PreserveLCSSA, /*DTU=*/nullptr, MSSAU); Changed = true; } @@ -872,9 +865,6 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, PreservedAnalyses PA; PA.preserve<DominatorTreeAnalysis>(); PA.preserve<LoopAnalysis>(); - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); PA.preserve<ScalarEvolutionAnalysis>(); PA.preserve<DependenceAnalysis>(); if (MSSAAnalysis) @@ -904,9 +894,8 @@ static void verifyLoop(Loop *L) { // Indirectbr can interfere with preheader and unique backedge insertion. if (!L->getLoopPreheader() || !L->getLoopLatch()) { bool HasIndBrPred = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PE = pred_end(L->getHeader()); PI != PE; ++PI) - if (isa<IndirectBrInst>((*PI)->getTerminator())) { + for (BasicBlock *Pred : predecessors(L->getHeader())) + if (isa<IndirectBrInst>(Pred->getTerminator())) { HasIndBrPred = true; break; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp index d4cd57405239..a91bf7b7af13 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,7 +59,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -220,26 +219,24 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, } } - // At this point, the code is well formed. We now do a quick sweep over the - // inserted code, doing constant propagation and dead code elimination as we - // go. + // At this point, the code is well formed. Perform constprop, instsimplify, + // and dce. const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SmallVector<WeakTrackingVH, 16> DeadInsts; for (BasicBlock *BB : L->getBlocks()) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; - if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); if (isInstructionTriviallyDead(Inst)) - BB->getInstList().erase(Inst); + DeadInsts.emplace_back(Inst); } + // We can't do recursive deletion until we're done iterating, as we might + // have a phi which (potentially indirectly) uses instructions later in + // the block we're iterating through. + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); } - - // TODO: after peeling or unrolling, previously loop variant conditions are - // likely to fold to constants, eagerly propagating those here will require - // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be - // appropriate. } /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling @@ -247,32 +244,10 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// -/// TripCount is the upper bound of the iteration on which control exits -/// LatchBlock. Control may exit the loop prior to TripCount iterations either -/// via an early branch in other loop block or via LatchBlock terminator. This -/// is relaxed from the general definition of trip count which is the number of -/// times the loop header executes. Note that UnrollLoop assumes that the loop -/// counter test is in LatchBlock in order to remove unnecesssary instances of -/// the test. If control can exit the loop from the LatchBlock's terminator -/// prior to TripCount iterations, flag PreserveCondBr needs to be set. -/// -/// PreserveCondBr indicates whether the conditional branch of the LatchBlock -/// needs to be preserved. It is needed when we use trip count upper bound to -/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first -/// conditional branch needs to be preserved. -/// -/// Similarly, TripMultiple divides the number of times that the LatchBlock may -/// execute without exiting the loop. -/// -/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that -/// have a runtime (i.e. not compile time constant) trip count. Unrolling these -/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" -/// iterations before branching into the unrolled loop. UnrollLoop will not -/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and -/// AllowExpensiveTripCount is false. -/// -/// If we want to perform PGO-based loop peeling, PeelCount is set to the -/// number of iterations we want to peel off. +/// If Runtime is true then UnrollLoop will try to insert a prologue or +/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop +/// will not runtime-unroll the loop if computing the run-time trip count will +/// be expensive and AllowExpensiveTripCount is false. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// @@ -287,6 +262,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { + assert(DT && "DomTree is required"); if (!L->getLoopPreheader()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -311,56 +287,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, return LoopUnrollResult::Unmodified; } - if (ULO.TripCount != 0) - LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n"); - if (ULO.TripMultiple != 1) - LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n"); - - // Effectively "DCE" unrolled iterations that are beyond the tripcount - // and will never be executed. - if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount) - ULO.Count = ULO.TripCount; - - // Don't enter the unroll code if there is nothing to do. - if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) { - LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); - return LoopUnrollResult::Unmodified; - } - assert(ULO.Count > 0); - assert(ULO.TripMultiple > 0); - assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0); - - // Are we eliminating the loop control altogether? - bool CompletelyUnroll = ULO.Count == ULO.TripCount; - - // We assume a run-time trip count if the compiler cannot - // figure out the loop trip count and the unroll-runtime - // flag is specified. - bool RuntimeTripCount = - (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime); - - assert((!RuntimeTripCount || !ULO.PeelCount) && - "Did not expect runtime trip-count unrolling " - "and peeling for the same loop"); - - bool Peeled = false; - if (ULO.PeelCount) { - Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA); - - // Successful peeling may result in a change in the loop preheader/trip - // counts. If we later unroll the loop, we want these to be updated. - if (Peeled) { - // According to our guards and profitability checks the only - // meaningful exit should be latch block. Other exits go to deopt, - // so we do not worry about them. - BasicBlock *ExitingBlock = L->getLoopLatch(); - assert(ExitingBlock && "Loop without exiting block?"); - assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); - ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); - } - } // All these values should be taken only after peeling because they might have // changed. @@ -371,6 +298,61 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, L->getExitBlocks(ExitBlocks); std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); + const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L); + const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); + + // Effectively "DCE" unrolled iterations that are beyond the max tripcount + // and will never be executed. + if (MaxTripCount && ULO.Count > MaxTripCount) + ULO.Count = MaxTripCount; + + struct ExitInfo { + unsigned TripCount; + unsigned TripMultiple; + unsigned BreakoutTrip; + bool ExitOnTrue; + SmallVector<BasicBlock *> ExitingBlocks; + }; + DenseMap<BasicBlock *, ExitInfo> ExitInfos; + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (auto *ExitingBlock : ExitingBlocks) { + // The folding code is not prepared to deal with non-branch instructions + // right now. + auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI) + continue; + + ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second; + Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + if (Info.TripCount != 0) { + Info.BreakoutTrip = Info.TripCount % ULO.Count; + Info.TripMultiple = 0; + } else { + Info.BreakoutTrip = Info.TripMultiple = + (unsigned)GreatestCommonDivisor64(ULO.Count, Info.TripMultiple); + } + Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); + Info.ExitingBlocks.push_back(ExitingBlock); + LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() + << ": TripCount=" << Info.TripCount + << ", TripMultiple=" << Info.TripMultiple + << ", BreakoutTrip=" << Info.BreakoutTrip << "\n"); + } + + // Are we eliminating the loop control altogether? Note that we can know + // we're eliminating the backedge without knowing exactly which iteration + // of the unrolled body exits. + const bool CompletelyUnroll = ULO.Count == MaxTripCount; + + const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; + + // There's no point in performing runtime unrolling if this unroll count + // results in a full unroll. + if (CompletelyUnroll) + ULO.Runtime = false; + // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which // means that complete unrolling might break this form. We need to either fix @@ -392,30 +374,16 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // A conditional branch which exits the loop, which can be optimized to an // unconditional branch in the unrolled loop in some cases. - BranchInst *ExitingBI = nullptr; bool LatchIsExiting = L->isLoopExiting(LatchBlock); - if (LatchIsExiting) - ExitingBI = LatchBI; - else if (BasicBlock *ExitingBlock = L->getExitingBlock()) - ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { - // If the peeling guard is changed this assert may be relaxed or even - // deleted. - assert(!Peeled && "Peeling guard changed!"); LLVM_DEBUG( dbgs() << "Can't unroll; a conditional latch must exit the loop"); return LoopUnrollResult::Unmodified; } - LLVM_DEBUG({ - if (ExitingBI) - dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() - << "\n"; - else - dbgs() << " No single exiting block\n"; - }); - // Loops containing convergent instructions must have a count that divides - // their TripMultiple. + // Loops containing convergent instructions cannot use runtime unrolling, + // as the prologue/epilogue may add additional control-dependencies to + // convergent operations. LLVM_DEBUG( { bool HasConvergent = false; @@ -423,22 +391,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (auto &I : *BB) if (auto *CB = dyn_cast<CallBase>(&I)) HasConvergent |= CB->isConvergent(); - assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) && - "Unroll count must divide trip multiple if loop contains a " - "convergent operation."); + assert((!HasConvergent || !ULO.Runtime) && + "Can't runtime unroll if loop contains a convergent operation."); }); bool EpilogProfitability = UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog : isEpilogProfitable(L); - if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 && + if (ULO.Runtime && !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, PreserveLCSSA, RemainderLoop)) { if (ULO.Force) - RuntimeTripCount = false; + ULO.Runtime = false; else { LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " "generated when assuming runtime trip count\n"); @@ -446,71 +413,34 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } - // If we know the trip count, we know the multiple... - unsigned BreakoutTrip = 0; - if (ULO.TripCount != 0) { - BreakoutTrip = ULO.TripCount % ULO.Count; - ULO.TripMultiple = 0; - } else { - // Figure out what multiple to use. - BreakoutTrip = ULO.TripMultiple = - (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple); - } - using namespace ore; // Report the unrolling decision. if (CompletelyUnroll) { LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << ULO.TripCount << "!\n"); + << " with trip count " << ULO.Count << "!\n"); if (ORE) ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), L->getHeader()) << "completely unrolled loop with " - << NV("UnrollCount", ULO.TripCount) << " iterations"; - }); - } else if (ULO.PeelCount) { - LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() - << " with iteration count " << ULO.PeelCount << "!\n"); - if (ORE) - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), - L->getHeader()) - << " peeled loop by " << NV("PeelCount", ULO.PeelCount) - << " iterations"; + << NV("UnrollCount", ULO.Count) << " iterations"; }); } else { - auto DiagBuilder = [&]() { - OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), - L->getHeader()); - return Diag << "unrolled loop by a factor of " - << NV("UnrollCount", ULO.Count); - }; - LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << ULO.Count); - if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) { - LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() << " with a breakout at trip " - << NV("BreakoutTrip", BreakoutTrip); - }); - } else if (ULO.TripMultiple != 1) { - LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch"); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() - << " with " << NV("TripMultiple", ULO.TripMultiple) - << " trips per branch"; - }); - } else if (RuntimeTripCount) { + if (ULO.Runtime) LLVM_DEBUG(dbgs() << " with run-time trip count"); - if (ORE) - ORE->emit( - [&]() { return DiagBuilder() << " with run-time trip count"; }); - } LLVM_DEBUG(dbgs() << "!\n"); + + if (ORE) + ORE->emit([&]() { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); + if (ULO.Runtime) + Diag << " with run-time trip count"; + return Diag; + }); } // We are going to make changes to this loop. SCEV may be keeping cached info @@ -530,12 +460,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (!LatchIsExiting) ++NumUnrolledNotLatch; - Optional<bool> ContinueOnTrue = None; - BasicBlock *LoopExit = nullptr; - if (ExitingBI) { - ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0)); - LoopExit = ExitingBI->getSuccessor(*ContinueOnTrue); - } // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. @@ -546,15 +470,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } std::vector<BasicBlock *> Headers; - std::vector<BasicBlock *> ExitingBlocks; - std::vector<BasicBlock *> ExitingSucc; std::vector<BasicBlock *> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); - if (ExitingBI) { - ExitingBlocks.push_back(ExitingBI->getParent()); - ExitingSucc.push_back(ExitingBI->getSuccessor(!(*ContinueOnTrue))); - } // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each @@ -576,7 +494,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); - if (Header->getParent()->isDebugInfoForProfiling()) + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa<DbgInfoIntrinsic>(&I)) @@ -652,12 +572,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // Keep track of the exiting block and its successor block contained in // the loop for the current iteration. - if (ExitingBI) { - if (*BB == ExitingBlocks[0]) - ExitingBlocks.push_back(New); - if (*BB == ExitingSucc[0]) - ExitingSucc.push_back(New); - } + auto ExitInfoIt = ExitInfos.find(*BB); + if (ExitInfoIt != ExitInfos.end()) + ExitInfoIt->second.ExitingBlocks.push_back(New); NewBlocks.push_back(New); UnrolledLoopBlocks.push_back(New); @@ -666,28 +583,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // dedicated entry block (copy of the header block), this header's copy // dominates all copied blocks. That means, dominance relations in the // copied body are the same as in the original body. - if (DT) { - if (*BB == Header) - DT->addNewBlock(New, Latches[It - 1]); - else { - auto BBDomNode = DT->getNode(*BB); - auto BBIDom = BBDomNode->getIDom(); - BasicBlock *OriginalBBIDom = BBIDom->getBlock(); - DT->addNewBlock( - New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); - } + if (*BB == Header) + DT->addNewBlock(New, Latches[It - 1]); + else { + auto BBDomNode = DT->getNode(*BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + DT->addNewBlock( + New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); } } // Remap all instructions in the most recent iteration remapInstructionsInBlocks(NewBlocks, LastValueMap); - for (BasicBlock *NewBlock : NewBlocks) { - for (Instruction &I : *NewBlock) { - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); - } - } + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + if (auto *II = dyn_cast<AssumeInst>(&I)) + AC->registerAssumption(II); { // Identify what other metadata depends on the cloned version. After @@ -717,116 +629,18 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } - auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop, - bool NeedConditional, Optional<bool> ContinueOnTrue, - bool IsDestLoopExit) { - auto *Term = cast<BranchInst>(Src->getTerminator()); - if (NeedConditional) { - // Update the conditional branch's successor for the following - // iteration. - assert(ContinueOnTrue.hasValue() && - "Expecting valid ContinueOnTrue when NeedConditional is true"); - Term->setSuccessor(!(*ContinueOnTrue), Dest); - } else { - // Remove phi operands at this loop exit - if (!IsDestLoopExit) { - BasicBlock *BB = Src; - for (BasicBlock *Succ : successors(BB)) { - // Preserve the incoming value from BB if we are jumping to the block - // in the current loop. - if (Succ == BlockInLoop) - continue; - for (PHINode &Phi : Succ->phis()) - Phi.removeIncomingValue(BB, false); - } - } - // Replace the conditional branch with an unconditional one. - BranchInst::Create(Dest, Term); - Term->eraseFromParent(); - } - }; - // Connect latches of the unrolled iterations to the headers of the next - // iteration. If the latch is also the exiting block, the conditional branch - // may have to be preserved. + // iteration. Currently they point to the header of the same iteration. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The branch destination. unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - bool NeedConditional = LatchIsExiting; - - if (LatchIsExiting) { - if (RuntimeTripCount && j != 0) - NeedConditional = false; - - // For a complete unroll, make the last iteration end with a branch - // to the exit block. - if (CompletelyUnroll) { - if (j == 0) - Dest = LoopExit; - // If using trip count upper bound to completely unroll, we need to - // keep the conditional branch except the last one because the loop - // may exit after any iteration. - assert(NeedConditional && - "NeedCondition cannot be modified by both complete " - "unrolling and runtime unrolling"); - NeedConditional = - (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0)); - } else if (j != BreakoutTrip && - (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) { - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - } - } - - setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue, - Dest == LoopExit); - } - - if (!LatchIsExiting) { - // If the latch is not exiting, we may be able to simplify the conditional - // branches in the unrolled exiting blocks. - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - // The branch destination. - unsigned j = (i + 1) % e; - bool NeedConditional = true; - - if (RuntimeTripCount && j != 0) - NeedConditional = false; - - if (CompletelyUnroll) - // We cannot drop the conditional branch for the last condition, as we - // may have to execute the loop body depending on the condition. - NeedConditional = j == 0 || ULO.PreserveCondBr; - else if (j != BreakoutTrip && - (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - - // Conditional branches from non-latch exiting block have successors - // either in the same loop iteration or outside the loop. The branches are - // already correct. - if (NeedConditional) - continue; - setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional, - None, false); - } - - // When completely unrolling, the last latch becomes unreachable. - if (CompletelyUnroll) { - BranchInst *Term = cast<BranchInst>(Latches.back()->getTerminator()); - new UnreachableInst(Term->getContext(), Term); - Term->eraseFromParent(); - } + Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]); } // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied // iterations too. - if (DT && ULO.Count > 1) { + if (ULO.Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); SmallVector<BasicBlock *, 16> ChildrenToUpdate; @@ -835,42 +649,98 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (!L->contains(ChildBB)) ChildrenToUpdate.push_back(ChildBB); } - BasicBlock *NewIDom; - if (ExitingBI && BB == ExitingBlocks[0]) { - // The latch is special because we emit unconditional branches in - // some cases where the original loop contained a conditional branch. - // Since the latch is always at the bottom of the loop, if the latch - // dominated an exit before unrolling, the new dominator of that exit - // must also be a latch. Specifically, the dominator is the first - // latch which ends in a conditional branch, or the last latch if - // there is no such latch. - // For loops exiting from non latch exiting block, we limit the - // branch simplification to single exiting block loops. - NewIDom = ExitingBlocks.back(); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - Instruction *Term = ExitingBlocks[i]->getTerminator(); - if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) { - NewIDom = - DT->findNearestCommonDominator(ExitingBlocks[i], Latches[i]); - break; - } - } - } else { - // The new idom of the block will be the nearest common dominator - // of all copies of the previous idom. This is equivalent to the - // nearest common dominator of the previous idom and the first latch, - // which dominates all copies of the previous idom. - NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); - } + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } } - assert(!DT || !UnrollVerifyDomtree || + assert(!UnrollVerifyDomtree || DT->verify(DominatorTree::VerificationLevel::Fast)); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + + auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) { + auto *Term = cast<BranchInst>(Src->getTerminator()); + const unsigned Idx = ExitOnTrue ^ WillExit; + BasicBlock *Dest = Term->getSuccessor(Idx); + BasicBlock *DeadSucc = Term->getSuccessor(1-Idx); + + // Remove predecessors from all non-Dest successors. + DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true); + + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + Term->eraseFromParent(); + + DTU.applyUpdates({{DominatorTree::Delete, Src, DeadSucc}}); + }; + + auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, + bool IsLatch) -> Optional<bool> { + if (CompletelyUnroll) { + if (PreserveOnlyFirst) { + if (i == 0) + return None; + return j == 0; + } + // Complete (but possibly inexact) unrolling + if (j == 0) + return true; + if (Info.TripCount && j != Info.TripCount) + return false; + return None; + } + + if (ULO.Runtime) { + // If runtime unrolling inserts a prologue, information about non-latch + // exits may be stale. + if (IsLatch && j != 0) + return false; + return None; + } + + if (j != Info.BreakoutTrip && + (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) { + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + return false; + } + return None; + }; + + // Fold branches for iterations where we know that they will exit or not + // exit. + for (const auto &Pair : ExitInfos) { + const ExitInfo &Info = Pair.second; + for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + bool IsLatch = Pair.first == LatchBlock; + Optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch); + if (!KnownWillExit) + continue; + + // We don't fold known-exiting branches for non-latch exits here, + // because this ensures that both all loop blocks and all exit blocks + // remain reachable in the CFG. + // TODO: We could fold these branches, but it would require much more + // sophisticated updates to LoopInfo. + if (*KnownWillExit && !IsLatch) + continue; + + SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue); + } + } + + // When completely unrolling, the last latch becomes unreachable. + if (!LatchIsExiting && CompletelyUnroll) + changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA, &DTU); + // Merge adjacent basic blocks, if possible. for (BasicBlock *Latch : Latches) { BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); @@ -893,8 +763,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. - simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI, - SE, DT, AC, TTI); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, + TTI); NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; @@ -915,39 +785,36 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); - // If we have a pass and a DominatorTree we should re-simplify impacted loops - // to ensure subsequent analyses can rely on this form. We want to simplify + // Make sure that loop-simplify form is preserved. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. - if (DT) { - if (OuterL) { - // OuterL includes all loops for which we can break loop-simplify, so - // it's sufficient to simplify only it (it'll recursively simplify inner - // loops too). - if (NeedToFixLCSSA) { - // LCSSA must be performed on the outermost affected loop. The unrolled - // loop's last loop latch is guaranteed to be in the outermost loop - // after LoopInfo's been updated by LoopInfo::erase. - Loop *LatchLoop = LI->getLoopFor(Latches.back()); - Loop *FixLCSSALoop = OuterL; - if (!FixLCSSALoop->contains(LatchLoop)) - while (FixLCSSALoop->getParentLoop() != LatchLoop) - FixLCSSALoop = FixLCSSALoop->getParentLoop(); - - formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); - } else if (PreserveLCSSA) { - assert(OuterL->isLCSSAForm(*DT) && - "Loops should be in LCSSA form after loop-unroll."); - } - - // TODO: That potentially might be compile-time expensive. We should try - // to fix the loop-simplified form incrementally. - simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); - } else { - // Simplify loops for which we might've broken loop-simplify form. - for (Loop *SubLoop : LoopsToSimplify) - simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); + if (OuterL) { + // OuterL includes all loops for which we can break loop-simplify, so + // it's sufficient to simplify only it (it'll recursively simplify inner + // loops too). + if (NeedToFixLCSSA) { + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop + // after LoopInfo's been updated by LoopInfo::erase. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + Loop *FixLCSSALoop = OuterL; + if (!FixLCSSALoop->contains(LatchLoop)) + while (FixLCSSALoop->getParentLoop() != LatchLoop) + FixLCSSALoop = FixLCSSALoop->getParentLoop(); + + formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); + } else if (PreserveLCSSA) { + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); } + + // TODO: That potentially might be compile-time expensive. We should try + // to fix the loop-simplified form incrementally. + simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); + } else { + // Simplify loops for which we might've broken loop-simplify form. + for (Loop *SubLoop : LoopsToSimplify) + simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); } return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index 6e32a2b865aa..6efaa012aeca 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -141,6 +141,7 @@ template <typename T> static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, BasicBlockSet &AftBlocks, T Visit) { SmallVector<Instruction *, 8> Worklist; + SmallPtrSet<Instruction *, 8> VisitedInstr; for (auto &Phi : Header->phis()) { Value *V = Phi.getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) @@ -151,11 +152,13 @@ static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, Instruction *I = Worklist.pop_back_val(); if (!Visit(I)) return false; + VisitedInstr.insert(I); if (AftBlocks.count(I->getParent())) for (auto &U : I->operands()) if (Instruction *II = dyn_cast<Instruction>(U)) - Worklist.push_back(II); + if (!VisitedInstr.count(II)) + Worklist.push_back(II); } return true; @@ -245,7 +248,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, bool CompletelyUnroll = (Count == TripCount); // We use the runtime remainder in cases where we don't know trip multiple - if (TripMultiple == 1 || TripMultiple % Count != 0) { + if (TripMultiple % Count != 0) { if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, /*UseEpilogRemainder*/ true, UnrollRemainder, /*ForgetAllSCEV*/ false, @@ -346,7 +349,9 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); - if (Header->getParent()->isDebugInfoForProfiling()) + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa<DbgInfoIntrinsic>(&I)) @@ -432,9 +437,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, remapInstructionsInBlocks(NewBlocks, LastValueMap); for (BasicBlock *NewBlock : NewBlocks) { for (Instruction &I : *NewBlock) { - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); + if (auto *II = dyn_cast<AssumeInst>(&I)) + AC->registerAssumption(II); } } @@ -831,6 +835,23 @@ static bool isEligibleLoopForm(const Loop &Root) { if (SubLoopsSize != 1) return false; + // Only loops with a single exit block can be unrolled and jammed. + // The function getExitBlock() is used for this check, rather than + // getUniqueExitBlock() to ensure loops with mulitple exit edges are + // disallowed. + if (!L->getExitBlock()) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit " + "blocks can be unrolled and jammed.\n"); + return false; + } + + // Only loops with a single exiting block can be unrolled and jammed. + if (!L->getExitingBlock()) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single " + "exiting blocks can be unrolled and jammed.\n"); + return false; + } + L = L->getSubLoops()[0]; } while (L); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 0abf62be156f..6749d3db743c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -50,6 +50,9 @@ static cl::opt<bool> UnrollRuntimeMultiExit( "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated")); +static cl::opt<bool> UnrollRuntimeOtherExitPredictable( + "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, + cl::desc("Assume the non latch exit block to be predictable")); /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the @@ -493,12 +496,19 @@ static bool canProfitablyUnrollMultiExitLoop( if (ExitingBlocks.size() > 2) return false; + // Allow unrolling of loops with no non latch exit blocks. + if (OtherExits.size() == 0) + return true; + // The second heuristic is that L has one exit other than the latchexit and // that exit is a deoptimize block. We know that deoptimize blocks are rarely // taken, which also implies the branch leading to the deoptimize block is - // highly predictable. + // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we + // assume the other exit branch is predictable even if it has no deoptimize + // call. return (OtherExits.size() == 1 && - OtherExits[0]->getTerminatingDeoptimizeCall()); + (UnrollRuntimeOtherExitPredictable || + OtherExits[0]->getTerminatingDeoptimizeCall())); // TODO: These can be fine-tuned further to consider code size or deopt states // that are captured by the deoptimize exit block. // Also, we can extend this to support more cases, if we actually @@ -974,11 +984,9 @@ bool llvm::UnrollRuntimeLoopRemainder( LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollResult = UnrollLoop(remainderLoop, - {/*Count*/ Count - 1, /*TripCount*/ Count - 1, - /*Force*/ false, /*AllowRuntime*/ false, - /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, - /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, - /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV}, + {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false, + /*AllowExpensiveTripCount*/ false, + /*UnrollRemainder*/ false, ForgetAllSCEV}, LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp index f0f423e9812a..e4d78f9ada08 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -54,16 +54,10 @@ using namespace llvm; using namespace llvm::PatternMatch; -static cl::opt<bool> ForceReductionIntrinsic( - "force-reduction-intrinsics", cl::Hidden, - cl::desc("Force creating reduction intrinsics for testing."), - cl::init(false)); - #define DEBUG_TYPE "loop-utils" static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; -static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -260,50 +254,8 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, TheLoop->setLoopID(NewLoopID); } -/// Find string metadata for loop -/// -/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an -/// operand or null otherwise. If the string metadata is not found return -/// Optional's not-a-value. -Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop, - StringRef Name) { - MDNode *MD = findOptionMDForLoop(TheLoop, Name); - if (!MD) - return None; - switch (MD->getNumOperands()) { - case 1: - return nullptr; - case 2: - return &MD->getOperand(1); - default: - llvm_unreachable("loop metadata has 0 or 1 operand"); - } -} - -static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, - StringRef Name) { - MDNode *MD = findOptionMDForLoop(TheLoop, Name); - if (!MD) - return None; - switch (MD->getNumOperands()) { - case 1: - // When the value is absent it is interpreted as 'attribute set'. - return true; - case 2: - if (ConstantInt *IntMD = - mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get())) - return IntMD->getZExtValue(); - return true; - } - llvm_unreachable("unexpected number of options"); -} - -bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { - return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); -} - Optional<ElementCount> -llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { +llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) { Optional<int> Width = getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); @@ -316,20 +268,6 @@ llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { return None; } -llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop, - StringRef Name) { - const MDOperand *AttrMD = - findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr); - if (!AttrMD) - return None; - - ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get()); - if (!IntMD) - return None; - - return IntMD->getSExtValue(); -} - Optional<MDNode *> llvm::makeFollowupLoopID( MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, const char *InheritOptionsExceptPrefix, bool AlwaysNew) { @@ -419,11 +357,7 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) { return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); } -bool llvm::hasMustProgress(const Loop *L) { - return getBooleanLoopAttribute(L, LLVMLoopMustProgress); -} - -TransformationMode llvm::hasUnrollTransformation(Loop *L) { +TransformationMode llvm::hasUnrollTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; @@ -444,7 +378,7 @@ TransformationMode llvm::hasUnrollTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { +TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) return TM_SuppressedByUser; @@ -462,7 +396,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasVectorizeTransformation(Loop *L) { +TransformationMode llvm::hasVectorizeTransformation(const Loop *L) { Optional<bool> Enable = getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); @@ -498,7 +432,7 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasDistributeTransformation(Loop *L) { +TransformationMode llvm::hasDistributeTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) return TM_ForcedByUser; @@ -508,7 +442,7 @@ TransformationMode llvm::hasDistributeTransformation(Loop *L) { return TM_Unspecified; } -TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) { +TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) return TM_SuppressedByUser; @@ -789,8 +723,8 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); - (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, - /*PreserveLCSSA*/true, &DTU, MSSAU.get()); + (void)changeToUnreachable(BackedgeBB->getTerminator(), + /*PreserveLCSSA*/ true, &DTU, MSSAU.get()); // Erase (and destroy) this loop instance. Handles relinking sub-loops // and blocks within the loop as needed. @@ -944,12 +878,6 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, break; } - // We only match FP sequences that are 'fast', so we can unconditionally - // set it on any generated instructions. - IRBuilderBase::FastMathFlagGuard FMFG(Builder); - FastMathFlags FMF; - FMF.setFast(); - Builder.setFastMathFlags(FMF); Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); return Select; @@ -1031,14 +959,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind, ArrayRef<Value *> RedOps) { - unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind); TargetTransformInfo::ReductionFlags RdxFlags; RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || RdxKind == RecurKind::FMax; RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; - if (!ForceReductionIntrinsic && - !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) - return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); switch (RdxKind) { @@ -1076,7 +1000,8 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *llvm::createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src) { + const RecurrenceDescriptor &Desc, + Value *Src) { // TODO: Support in-order reductions based on the recurrence descriptor. // All ops in the reduction inherit fast-math-flags from the recurrence // descriptor. @@ -1085,6 +1010,17 @@ Value *llvm::createTargetReduction(IRBuilderBase &B, return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind()); } +Value *llvm::createOrderedReduction(IRBuilderBase &B, + const RecurrenceDescriptor &Desc, + Value *Src, Value *Start) { + assert(Desc.getRecurrenceKind() == RecurKind::FAdd && + "Unexpected reduction kind"); + assert(Src->getType()->isVectorTy() && "Expected a vector type"); + assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); + + return B.CreateFAddReduce(Start, Src); +} + void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { auto *VecOp = dyn_cast<Instruction>(I); if (!VecOp) @@ -1587,55 +1523,31 @@ struct PointerBounds { /// in \p TheLoop. \return the values for the bounds. static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, - SCEVExpander &Exp, ScalarEvolution *SE) { - // TODO: Add helper to retrieve pointers to CG. - Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue; - const SCEV *Sc = SE->getSCEV(Ptr); - - unsigned AS = Ptr->getType()->getPointerAddressSpace(); + SCEVExpander &Exp) { LLVMContext &Ctx = Loc->getContext(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); - - if (SE->isLoopInvariant(Sc, TheLoop)) { - LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" - << *Ptr << "\n"); - // Ptr could be in the loop body. If so, expand a new one at the correct - // location. - Instruction *Inst = dyn_cast<Instruction>(Ptr); - Value *NewPtr = (Inst && TheLoop->contains(Inst)) - ? Exp.expandCodeFor(Sc, PtrArithTy, Loc) - : Ptr; - // We must return a half-open range, which means incrementing Sc. - const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy)); - Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc); - return {NewPtr, NewPtrPlusOne}; - } else { - Value *Start = nullptr, *End = nullptr; - LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); - LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High - << "\n"); - return {Start, End}; - } + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace); + + Value *Start = nullptr, *End = nullptr; + LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + return {Start, End}; } /// Turns a collection of checks into a collection of expanded upper and /// lower bounds for both pointers in the check. static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, - Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) { + Instruction *Loc, SCEVExpander &Exp) { SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; // Here we're relying on the SCEV Expander's cache to only emit code for the // same bounds once. transform(PointerChecks, std::back_inserter(ChecksWithBounds), [&](const RuntimePointerCheck &Check) { - PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE), - Second = - expandBounds(Check.second, L, Loc, Exp, SE); + PointerBounds First = expandBounds(Check.first, L, Loc, Exp), + Second = expandBounds(Check.second, L, Loc, Exp); return std::make_pair(First, Second); }); @@ -1645,12 +1557,10 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks( Instruction *Loc, Loop *TheLoop, const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, - ScalarEvolution *SE) { + SCEVExpander &Exp) { // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible - const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); - SCEVExpander Exp(*SE, DL, "induction"); - auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp); + auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp); LLVMContext &Ctx = Loc->getContext(); Instruction *FirstInst = nullptr; @@ -1722,3 +1632,177 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks( FirstInst = GetFirstInst(FirstInst, Check, Loc); return std::make_pair(FirstInst, Check); } + +Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L, + unsigned MSSAThreshold, + MemorySSA &MSSA, + AAResults &AA) { + auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast<CmpInst>(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L.contains(CondI)) + return {}; + + SmallVector<Instruction *> InstToDuplicate; + InstToDuplicate.push_back(CondI); + + SmallVector<Value *, 4> WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector<MemoryAccess *, 4> AccessesToCheck; + SmallVector<MemoryLocation, 4> AccessedLocs; + while (!WorkList.empty()) { + Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val()); + if (!I || !L.contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I)) + return {}; + + // Do not duplicate volatile and atomic loads. + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->isVolatile() || LI->isAtomic()) + return {}; + + InstToDuplicate.push_back(I); + if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) { + if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } else { + // MemoryDefs may clobber the location or may be atomic memory + // operations. Bail out. + return {}; + } + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (InstToDuplicate.empty()) + return {}; + + SmallVector<BasicBlock *, 4> ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + auto HasNoClobbersOnPath = + [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate, + MSSAThreshold](BasicBlock *Succ, BasicBlock *Header, + SmallVector<MemoryAccess *, 4> AccessesToCheck) + -> Optional<IVConditionInfo> { + IVConditionInfo Info; + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector<BasicBlock *, 4> WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet<BasicBlock *, 4> Seen; + Seen.insert(Header); + Info.PathIsNoop &= + all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L.contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + Info.PathIsNoop &= all_of( + *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Require at least 2 blocks on a path through the loop. This skips + // paths that directly exit the loop. + if (Seen.size() < 2) + return {}; + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet<MemoryAccess *, 4> SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return {}; + + // MemoryUse are read-only accesses. + if (isa<MemoryUse>(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) { + if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return {}; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser())); + } + + // We could also allow loops with known trip counts without mustprogress, + // but ScalarEvolution may not be available. + Info.PathIsNoop &= isMustProgress(&L); + + // If the path is considered a no-op so far, check if it reaches a + // single exit block without any phis. This ensures no values from the + // loop are used outside of the loop. + if (Info.PathIsNoop) { + for (auto *Exiting : ExitingBlocks) { + if (!Seen.contains(Exiting)) + continue; + for (auto *Succ : successors(Exiting)) { + if (L.contains(Succ)) + continue; + + Info.PathIsNoop &= llvm::empty(Succ->phis()) && + (!Info.ExitForPath || Info.ExitForPath == Succ); + if (!Info.PathIsNoop) + break; + assert((!Info.ExitForPath || Info.ExitForPath == Succ) && + "cannot have multiple exit blocks"); + Info.ExitForPath = Succ; + } + } + } + if (!Info.ExitForPath) + Info.PathIsNoop = false; + + Info.InstToDuplicate = InstToDuplicate; + return Info; + }; + + // If we branch to the same successor, partial unswitching will not be + // beneficial. + if (TI->getSuccessor(0) == TI->getSuccessor(1)) + return {}; + + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getTrue(TI->getContext()); + return Info; + } + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getFalse(TI->getContext()); + return Info; + } + + return {}; +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 599bd1feb2bc..8a89158788cf 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -44,11 +44,11 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, AliasChecks(Checks.begin(), Checks.end()), Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT), SE(SE) { - assert(L->getUniqueExitBlock() && "No single exit block"); } void LoopVersioning::versionLoop( const SmallVectorImpl<Instruction *> &DefsUsedOutside) { + assert(VersionedLoop->getUniqueExitBlock() && "No single exit block"); assert(VersionedLoop->isLoopSimplifyForm() && "Loop is not in loop-simplify form"); @@ -60,9 +60,12 @@ void LoopVersioning::versionLoop( // Add the memcheck in the original preheader (this is empty initially). BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); const auto &RtPtrChecking = *LAI.getRuntimePointerChecking(); - std::tie(FirstCheckInst, MemRuntimeCheck) = - addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop, - AliasChecks, RtPtrChecking.getSE()); + + SCEVExpander Exp2(*RtPtrChecking.getSE(), + VersionedLoop->getHeader()->getModule()->getDataLayout(), + "induction"); + std::tie(FirstCheckInst, MemRuntimeCheck) = addRuntimeChecks( + RuntimeCheckBB->getTerminator(), VersionedLoop, AliasChecks, Exp2); SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "scev.check"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp new file mode 100644 index 000000000000..68d4dd9d576b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp @@ -0,0 +1,408 @@ +//===-- MemoryOpRemark.cpp - Auto-init remark analysis---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the analysis for the "auto-init" remark. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MemoryOpRemark.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" + +using namespace llvm; +using namespace llvm::ore; + +MemoryOpRemark::~MemoryOpRemark() = default; + +bool MemoryOpRemark::canHandle(const Instruction *I, const TargetLibraryInfo &TLI) { + if (isa<StoreInst>(I)) + return true; + + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::memcpy_inline: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: + return true; + default: + return false; + } + } + + if (auto *CI = dyn_cast<CallInst>(I)) { + auto *CF = CI->getCalledFunction(); + if (!CF) + return false; + + if (!CF->hasName()) + return false; + + LibFunc LF; + bool KnownLibCall = TLI.getLibFunc(*CF, LF) && TLI.has(LF); + if (!KnownLibCall) + return false; + + switch (LF) { + case LibFunc_memcpy_chk: + case LibFunc_mempcpy_chk: + case LibFunc_memset_chk: + case LibFunc_memmove_chk: + case LibFunc_memcpy: + case LibFunc_mempcpy: + case LibFunc_memset: + case LibFunc_memmove: + case LibFunc_bzero: + case LibFunc_bcopy: + return true; + default: + return false; + } + } + + return false; +} + +void MemoryOpRemark::visit(const Instruction *I) { + // For some of them, we can provide more information: + + // For stores: + // * size + // * volatile / atomic + if (auto *SI = dyn_cast<StoreInst>(I)) { + visitStore(*SI); + return; + } + + // For intrinsics: + // * user-friendly name + // * size + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + visitIntrinsicCall(*II); + return; + } + + // For calls: + // * known/unknown function (e.g. the compiler knows bzero, but it doesn't + // know my_bzero) + // * memory operation size + if (auto *CI = dyn_cast<CallInst>(I)) { + visitCall(*CI); + return; + } + + visitUnknown(*I); +} + +std::string MemoryOpRemark::explainSource(StringRef Type) const { + return (Type + ".").str(); +} + +StringRef MemoryOpRemark::remarkName(RemarkKind RK) const { + switch (RK) { + case RK_Store: + return "MemoryOpStore"; + case RK_Unknown: + return "MemoryOpUnknown"; + case RK_IntrinsicCall: + return "MemoryOpIntrinsicCall"; + case RK_Call: + return "MemoryOpCall"; + } + llvm_unreachable("missing RemarkKind case"); +} + +static void inlineVolatileOrAtomicWithExtraArgs(bool *Inline, bool Volatile, + bool Atomic, + DiagnosticInfoIROptimization &R) { + if (Inline && *Inline) + R << " Inlined: " << NV("StoreInlined", true) << "."; + if (Volatile) + R << " Volatile: " << NV("StoreVolatile", true) << "."; + if (Atomic) + R << " Atomic: " << NV("StoreAtomic", true) << "."; + // Emit the false cases under ExtraArgs. This won't show them in the remark + // message but will end up in the serialized remarks. + if ((Inline && !*Inline) || !Volatile || !Atomic) + R << setExtraArgs(); + if (Inline && !*Inline) + R << " Inlined: " << NV("StoreInlined", false) << "."; + if (!Volatile) + R << " Volatile: " << NV("StoreVolatile", false) << "."; + if (!Atomic) + R << " Atomic: " << NV("StoreAtomic", false) << "."; +} + +static Optional<uint64_t> getSizeInBytes(Optional<uint64_t> SizeInBits) { + if (!SizeInBits || *SizeInBits % 8 != 0) + return None; + return *SizeInBits / 8; +} + +template<typename ...Ts> +std::unique_ptr<DiagnosticInfoIROptimization> +MemoryOpRemark::makeRemark(Ts... Args) { + switch (diagnosticKind()) { + case DK_OptimizationRemarkAnalysis: + return std::make_unique<OptimizationRemarkAnalysis>(Args...); + case DK_OptimizationRemarkMissed: + return std::make_unique<OptimizationRemarkMissed>(Args...); + default: + llvm_unreachable("unexpected DiagnosticKind"); + } +} + +void MemoryOpRemark::visitStore(const StoreInst &SI) { + bool Volatile = SI.isVolatile(); + bool Atomic = SI.isAtomic(); + int64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + auto R = makeRemark(RemarkPass.data(), remarkName(RK_Store), &SI); + *R << explainSource("Store") << "\nStore size: " << NV("StoreSize", Size) + << " bytes."; + visitPtr(SI.getOperand(1), /*IsRead=*/false, *R); + inlineVolatileOrAtomicWithExtraArgs(nullptr, Volatile, Atomic, *R); + ORE.emit(*R); +} + +void MemoryOpRemark::visitUnknown(const Instruction &I) { + auto R = makeRemark(RemarkPass.data(), remarkName(RK_Unknown), &I); + *R << explainSource("Initialization"); + ORE.emit(*R); +} + +void MemoryOpRemark::visitIntrinsicCall(const IntrinsicInst &II) { + SmallString<32> CallTo; + bool Atomic = false; + bool Inline = false; + switch (II.getIntrinsicID()) { + case Intrinsic::memcpy_inline: + CallTo = "memcpy"; + Inline = true; + break; + case Intrinsic::memcpy: + CallTo = "memcpy"; + break; + case Intrinsic::memmove: + CallTo = "memmove"; + break; + case Intrinsic::memset: + CallTo = "memset"; + break; + case Intrinsic::memcpy_element_unordered_atomic: + CallTo = "memcpy"; + Atomic = true; + break; + case Intrinsic::memmove_element_unordered_atomic: + CallTo = "memmove"; + Atomic = true; + break; + case Intrinsic::memset_element_unordered_atomic: + CallTo = "memset"; + Atomic = true; + break; + default: + return visitUnknown(II); + } + + auto R = makeRemark(RemarkPass.data(), remarkName(RK_IntrinsicCall), &II); + visitCallee(CallTo.str(), /*KnownLibCall=*/true, *R); + visitSizeOperand(II.getOperand(2), *R); + + auto *CIVolatile = dyn_cast<ConstantInt>(II.getOperand(3)); + // No such thing as a memory intrinsic that is both atomic and volatile. + bool Volatile = !Atomic && CIVolatile && CIVolatile->getZExtValue(); + switch (II.getIntrinsicID()) { + case Intrinsic::memcpy_inline: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memcpy_element_unordered_atomic: + visitPtr(II.getOperand(1), /*IsRead=*/true, *R); + visitPtr(II.getOperand(0), /*IsRead=*/false, *R); + break; + case Intrinsic::memset: + case Intrinsic::memset_element_unordered_atomic: + visitPtr(II.getOperand(0), /*IsRead=*/false, *R); + break; + } + inlineVolatileOrAtomicWithExtraArgs(&Inline, Volatile, Atomic, *R); + ORE.emit(*R); +} + +void MemoryOpRemark::visitCall(const CallInst &CI) { + Function *F = CI.getCalledFunction(); + if (!F) + return visitUnknown(CI); + + LibFunc LF; + bool KnownLibCall = TLI.getLibFunc(*F, LF) && TLI.has(LF); + auto R = makeRemark(RemarkPass.data(), remarkName(RK_Call), &CI); + visitCallee(F, KnownLibCall, *R); + visitKnownLibCall(CI, LF, *R); + ORE.emit(*R); +} + +template <typename FTy> +void MemoryOpRemark::visitCallee(FTy F, bool KnownLibCall, + DiagnosticInfoIROptimization &R) { + R << "Call to "; + if (!KnownLibCall) + R << NV("UnknownLibCall", "unknown") << " function "; + R << NV("Callee", F) << explainSource(""); +} + +void MemoryOpRemark::visitKnownLibCall(const CallInst &CI, LibFunc LF, + DiagnosticInfoIROptimization &R) { + switch (LF) { + default: + return; + case LibFunc_memset_chk: + case LibFunc_memset: + visitSizeOperand(CI.getOperand(2), R); + visitPtr(CI.getOperand(0), /*IsRead=*/false, R); + break; + case LibFunc_bzero: + visitSizeOperand(CI.getOperand(1), R); + visitPtr(CI.getOperand(0), /*IsRead=*/false, R); + break; + case LibFunc_memcpy_chk: + case LibFunc_mempcpy_chk: + case LibFunc_memmove_chk: + case LibFunc_memcpy: + case LibFunc_mempcpy: + case LibFunc_memmove: + case LibFunc_bcopy: + visitSizeOperand(CI.getOperand(2), R); + visitPtr(CI.getOperand(1), /*IsRead=*/true, R); + visitPtr(CI.getOperand(0), /*IsRead=*/false, R); + break; + } +} + +void MemoryOpRemark::visitSizeOperand(Value *V, DiagnosticInfoIROptimization &R) { + if (auto *Len = dyn_cast<ConstantInt>(V)) { + uint64_t Size = Len->getZExtValue(); + R << " Memory operation size: " << NV("StoreSize", Size) << " bytes."; + } +} + +static Optional<StringRef> nameOrNone(const Value *V) { + if (V->hasName()) + return V->getName(); + return None; +} + +void MemoryOpRemark::visitVariable(const Value *V, + SmallVectorImpl<VariableInfo> &Result) { + if (auto *GV = dyn_cast<GlobalVariable>(V)) { + auto *Ty = GV->getValueType(); + uint64_t Size = DL.getTypeSizeInBits(Ty).getFixedSize(); + VariableInfo Var{nameOrNone(GV), Size}; + if (!Var.isEmpty()) + Result.push_back(std::move(Var)); + return; + } + + // If we find some information in the debug info, take that. + bool FoundDI = false; + // Try to get an llvm.dbg.declare, which has a DILocalVariable giving us the + // real debug info name and size of the variable. + for (const DbgVariableIntrinsic *DVI : + FindDbgAddrUses(const_cast<Value *>(V))) { + if (DILocalVariable *DILV = DVI->getVariable()) { + Optional<uint64_t> DISize = getSizeInBytes(DILV->getSizeInBits()); + VariableInfo Var{DILV->getName(), DISize}; + if (!Var.isEmpty()) { + Result.push_back(std::move(Var)); + FoundDI = true; + } + } + } + if (FoundDI) { + assert(!Result.empty()); + return; + } + + const auto *AI = dyn_cast<AllocaInst>(V); + if (!AI) + return; + + // If not, get it from the alloca. + Optional<TypeSize> TySize = AI->getAllocationSizeInBits(DL); + Optional<uint64_t> Size = + TySize ? getSizeInBytes(TySize->getFixedSize()) : None; + VariableInfo Var{nameOrNone(AI), Size}; + if (!Var.isEmpty()) + Result.push_back(std::move(Var)); +} + +void MemoryOpRemark::visitPtr(Value *Ptr, bool IsRead, DiagnosticInfoIROptimization &R) { + // Find if Ptr is a known variable we can give more information on. + SmallVector<Value *, 2> Objects; + getUnderlyingObjectsForCodeGen(Ptr, Objects); + SmallVector<VariableInfo, 2> VIs; + for (const Value *V : Objects) + visitVariable(V, VIs); + + if (VIs.empty()) { + bool CanBeNull; + bool CanBeFreed; + uint64_t Size = Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); + if (!Size) + return; + VIs.push_back({None, Size}); + } + + R << (IsRead ? "\n Read Variables: " : "\n Written Variables: "); + for (unsigned i = 0; i < VIs.size(); ++i) { + const VariableInfo &VI = VIs[i]; + assert(!VI.isEmpty() && "No extra content to display."); + if (i != 0) + R << ", "; + if (VI.Name) + R << NV(IsRead ? "RVarName" : "WVarName", *VI.Name); + else + R << NV(IsRead ? "RVarName" : "WVarName", "<unknown>"); + if (VI.Size) + R << " (" << NV(IsRead ? "RVarSize" : "WVarSize", *VI.Size) << " bytes)"; + } + R << "."; +} + +bool AutoInitRemark::canHandle(const Instruction *I) { + if (!I->hasMetadata(LLVMContext::MD_annotation)) + return false; + return any_of(I->getMetadata(LLVMContext::MD_annotation)->operands(), + [](const MDOperand &Op) { + return cast<MDString>(Op.get())->getString() == "auto-init"; + }); +} + +std::string AutoInitRemark::explainSource(StringRef Type) const { + return (Type + " inserted by -ftrivial-auto-var-init.").str(); +} + +StringRef AutoInitRemark::remarkName(RemarkKind RK) const { + switch (RK) { + case RK_Store: + return "AutoInitStore"; + case RK_Unknown: + return "AutoInitUnknownInstruction"; + case RK_IntrinsicCall: + return "AutoInitIntrinsicCall"; + case RK_Call: + return "AutoInitCall"; + } + llvm_unreachable("missing RemarkKind case"); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp index e350320e7569..b1965cf2becb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -67,9 +67,9 @@ struct Renamer { }; void MetaRename(Function &F) { - for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->getType()->isVoidTy()) - AI->setName("arg"); + for (Argument &Arg : F.args()) + if (!Arg.getType()->isVoidTy()) + Arg.setName("arg"); for (auto &BB : F) { BB.setName("bb"); @@ -101,12 +101,12 @@ void MetaRename(Module &M, } // Rename all global variables - for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { - StringRef Name = GI->getName(); + for (GlobalVariable &GV : M.globals()) { + StringRef Name = GV.getName(); if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) continue; - GI->setName("global"); + GV.setName("global"); } // Rename all struct types diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp index ef9f18a2289e..2aef37205c53 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -76,18 +76,20 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> SmallPtrSet<Constant *, 16> InitAsSet; SmallVector<Constant *, 16> Init; if (GV) { - auto *CA = cast<ConstantArray>(GV->getInitializer()); - for (auto &Op : CA->operands()) { - Constant *C = cast_or_null<Constant>(Op); - if (InitAsSet.insert(C).second) - Init.push_back(C); + if (GV->hasInitializer()) { + auto *CA = cast<ConstantArray>(GV->getInitializer()); + for (auto &Op : CA->operands()) { + Constant *C = cast_or_null<Constant>(Op); + if (InitAsSet.insert(C).second) + Init.push_back(C); + } } GV->eraseFromParent(); } Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext()); for (auto *V : Values) { - Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy); + Constant *C = ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, Int8PtrTy); if (InitAsSet.insert(C).second) Init.push_back(C); } @@ -120,11 +122,14 @@ llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, } Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { - Function *Ctor = Function::Create( + Function *Ctor = Function::createWithDefaultAttr( FunctionType::get(Type::getVoidTy(M.getContext()), false), - GlobalValue::InternalLinkage, CtorName, &M); + GlobalValue::InternalLinkage, 0, CtorName, &M); + Ctor->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); ReturnInst::Create(M.getContext(), CtorBB); + // Ensure Ctor cannot be discarded, even if in a comdat. + appendToUsed(M, {Ctor}); return Ctor; } @@ -172,28 +177,6 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions( return std::make_pair(Ctor, InitFunction); } -Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) { - assert(!Name.empty() && "Expected init function name"); - if (Function *F = M.getFunction(Name)) { - if (F->arg_size() != 0 || - F->getReturnType() != Type::getVoidTy(M.getContext())) { - std::string Err; - raw_string_ostream Stream(Err); - Stream << "Sanitizer interface function defined with wrong type: " << *F; - report_fatal_error(Err); - } - return F; - } - Function *F = - cast<Function>(M.getOrInsertFunction(Name, AttributeList(), - Type::getVoidTy(M.getContext())) - .getCallee()); - - appendToGlobalCtors(M, F, 0); - - return F; -} - void llvm::filterDeadComdatFunctions( Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) { // Build a map from the comdat to the number of entries in that comdat we @@ -287,7 +270,7 @@ std::string llvm::getUniqueModuleId(Module *M) { SmallString<32> Str; MD5::stringifyResult(R, Str); - return ("$" + Str).str(); + return ("." + Str).str(); } void VFABI::setVectorVariantNames( diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 3312a6f9459b..91280762aaa7 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -24,7 +23,6 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -77,8 +75,7 @@ static Instruction *getBranchTerminator(const PredicateBase *PB) { // Given a predicate info that is a type of branching terminator, get the // edge this predicate info represents -const std::pair<BasicBlock *, BasicBlock *> -getBlockEdge(const PredicateBase *PB) { +std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const PredicateBase *PB) { assert(isa<PredicateWithEdge>(PB) && "Not a predicate info type we know how to get an edge from."); const auto *PEdge = cast<PredicateWithEdge>(PB); @@ -158,8 +155,7 @@ struct ValueDFS_Compare { } // For a phi use, or a non-materialized def, return the edge it represents. - const std::pair<BasicBlock *, BasicBlock *> - getBlockEdge(const ValueDFS &VD) const { + std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const ValueDFS &VD) const { if (!VD.Def && VD.U) { auto *PHI = cast<PHINode>(VD.U->getUser()); return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent()); @@ -541,21 +537,6 @@ void PredicateInfoBuilder::buildPredicateInfo() { renameUses(OpsToRename); } -// Create a ssa_copy declaration with custom mangling, because -// Intrinsic::getDeclaration does not handle overloaded unnamed types properly: -// all unnamed types get mangled to the same string. We use the pointer -// to the type as name here, as it guarantees unique names for different -// types and we remove the declarations when destroying PredicateInfo. -// It is a workaround for PR38117, because solving it in a fully general way is -// tricky (FIXME). -static Function *getCopyDeclaration(Module *M, Type *Ty) { - std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty); - return cast<Function>( - M->getOrInsertFunction(Name, - getType(M->getContext(), Intrinsic::ssa_copy, Ty)) - .getCallee()); -} - // Given the renaming stack, make all the operands currently on the stack real // by inserting them into the IR. Return the last operation's value. Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, @@ -587,9 +568,8 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // order in the case of multiple predicateinfo in the same block. if (isa<PredicateWithEdge>(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); - Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (IF->users().empty()) - PI.CreatedDeclarations.insert(IF); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::ssa_copy, Op->getType()); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); PI.PredicateMap.insert({PIC, ValInfo}); @@ -601,9 +581,8 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // Insert the predicate directly after the assume. While it also holds // directly before it, assume(i1 true) is not a useful fact. IRBuilder<> B(PAssume->AssumeInst->getNextNode()); - Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (IF->users().empty()) - PI.CreatedDeclarations.insert(IF); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::ssa_copy, Op->getType()); CallInst *PIC = B.CreateCall(IF, Op); PI.PredicateMap.insert({PIC, ValInfo}); Result.Def = PIC; @@ -782,23 +761,6 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, Builder.buildPredicateInfo(); } -// Remove all declarations we created . The PredicateInfo consumers are -// responsible for remove the ssa_copy calls created. -PredicateInfo::~PredicateInfo() { - // Collect function pointers in set first, as SmallSet uses a SmallVector - // internally and we have to remove the asserting value handles first. - SmallPtrSet<Function *, 20> FunctionPtrs; - for (auto &F : CreatedDeclarations) - FunctionPtrs.insert(&*F); - CreatedDeclarations.clear(); - - for (Function *F : FunctionPtrs) { - assert(F->user_begin() == F->user_end() && - "PredicateInfo consumer did not remove all SSA copies."); - F->eraseFromParent(); - } -} - Optional<PredicateConstraint> PredicateBase::getConstraint() const { switch (Type) { case PT_Assume: @@ -865,20 +827,6 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); } -// Replace ssa_copy calls created by PredicateInfo with their operand. -static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { - for (auto I = inst_begin(F), E = inst_end(F); I != E;) { - Instruction *Inst = &*I++; - const auto *PI = PredInfo.getPredicateInfoFor(Inst); - auto *II = dyn_cast<IntrinsicInst>(Inst); - if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) - continue; - - Inst->replaceAllUsesWith(II->getOperand(0)); - Inst->eraseFromParent(); - } -} - bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); @@ -886,8 +834,6 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); - - replaceCreatedSSACopys(*PredInfo, F); return false; } @@ -899,7 +845,6 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); PredInfo->print(OS); - replaceCreatedSSACopys(*PredInfo, F); return PreservedAnalyses::all(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 86bbb6a889e6..427028066026 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -306,17 +306,15 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { LoadNotNull->insertAfter(LI); CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull}); CI->insertAfter(LoadNotNull); - AC->registerAssumption(CI); + AC->registerAssumption(cast<AssumeInst>(CI)); } static void removeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. - for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) { - Instruction *I = cast<Instruction>(UI->getUser()); - Use &U = *UI; - ++UI; + for (Use &U : llvm::make_early_inc_range(AI->uses())) { + Instruction *I = cast<Instruction>(U.getUser()); if (isa<LoadInst>(I) || isa<StoreInst>(I)) continue; @@ -330,10 +328,8 @@ static void removeIntrinsicUsers(AllocaInst *AI) { // The only users of this bitcast/GEP instruction are lifetime intrinsics. // Follow the use/def chain to erase them now instead of leaving it for // dead code elimination later. - for (auto UUI = I->use_begin(), UUE = I->use_end(); UUI != UUE;) { - Instruction *Inst = cast<Instruction>(UUI->getUser()); - Use &UU = *UUI; - ++UUI; + for (Use &UU : llvm::make_early_inc_range(I->uses())) { + Instruction *Inst = cast<Instruction>(UU.getUser()); // Drop the use of I in droppable instructions. if (Inst->isDroppable()) { @@ -403,7 +399,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) - ReplVal = UndefValue::get(LI->getType()); + ReplVal = PoisonValue::get(LI->getType()); // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve @@ -512,7 +508,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) - ReplVal = UndefValue::get(LI->getType()); + ReplVal = PoisonValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); } @@ -676,7 +672,7 @@ void PromoteMem2Reg::run() { // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. if (!A->use_empty()) - A->replaceAllUsesWith(UndefValue::get(A->getType())); + A->replaceAllUsesWith(PoisonValue::get(A->getType())); A->eraseFromParent(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp new file mode 100644 index 000000000000..85e5adaeaf5e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -0,0 +1,212 @@ +//===- RelLookupTableConverterPass - Rel Table Conv -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements relative lookup table converter that converts +// lookup tables to relative lookup tables to make them PIC-friendly. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/RelLookupTableConverter.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { + // If lookup table has more than one user, + // do not generate a relative lookup table. + // This is to simplify the analysis that needs to be done for this pass. + // TODO: Add support for lookup tables with multiple uses. + // For ex, this can happen when a function that uses a lookup table gets + // inlined into multiple call sites. + if (!GV.hasInitializer() || + !GV.isConstant() || + !GV.hasOneUse()) + return false; + + GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser()); + if (!GEP || !GEP->hasOneUse()) + return false; + + LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser()); + if (!Load || !Load->hasOneUse()) + return false; + + // If the original lookup table does not have local linkage and is + // not dso_local, do not generate a relative lookup table. + // This optimization creates a relative lookup table that consists of + // offsets between the start of the lookup table and its elements. + // To be able to generate these offsets, relative lookup table and + // its elements should have internal linkage and be dso_local, which means + // that they should resolve to symbols within the same linkage unit. + if (!GV.hasLocalLinkage() || + !GV.isDSOLocal() || + !GV.isImplicitDSOLocal()) + return false; + + ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer()); + // If values are not pointers, do not generate a relative lookup table. + if (!Array || !Array->getType()->getElementType()->isPointerTy()) + return false; + + const DataLayout &DL = M.getDataLayout(); + for (const Use &Op : Array->operands()) { + Constant *ConstOp = cast<Constant>(&Op); + GlobalValue *GVOp; + APInt Offset; + + // If an operand is not a constant offset from a lookup table, + // do not generate a relative lookup table. + if (!IsConstantOffsetFromGlobal(ConstOp, GVOp, Offset, DL)) + return false; + + // If operand is mutable, do not generate a relative lookup table. + auto *GlovalVarOp = dyn_cast<GlobalVariable>(GVOp); + if (!GlovalVarOp || !GlovalVarOp->isConstant()) + return false; + + if (!GlovalVarOp->hasLocalLinkage() || + !GlovalVarOp->isDSOLocal() || + !GlovalVarOp->isImplicitDSOLocal()) + return false; + } + + return true; +} + +static GlobalVariable *createRelLookupTable(Function &Func, + GlobalVariable &LookupTable) { + Module &M = *Func.getParent(); + ConstantArray *LookupTableArr = + cast<ConstantArray>(LookupTable.getInitializer()); + unsigned NumElts = LookupTableArr->getType()->getNumElements(); + ArrayType *IntArrayTy = + ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts); + + GlobalVariable *RelLookupTable = new GlobalVariable( + M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(), + nullptr, "reltable." + Func.getName(), &LookupTable, + LookupTable.getThreadLocalMode(), LookupTable.getAddressSpace(), + LookupTable.isExternallyInitialized()); + + uint64_t Idx = 0; + SmallVector<Constant *, 64> RelLookupTableContents(NumElts); + + for (Use &Operand : LookupTableArr->operands()) { + Constant *Element = cast<Constant>(Operand); + Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); + Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy); + Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy); + Constant *Sub = llvm::ConstantExpr::getSub(Target, Base); + Constant *RelOffset = + llvm::ConstantExpr::getTrunc(Sub, Type::getInt32Ty(M.getContext())); + RelLookupTableContents[Idx++] = RelOffset; + } + + Constant *Initializer = + ConstantArray::get(IntArrayTy, RelLookupTableContents); + RelLookupTable->setInitializer(Initializer); + RelLookupTable->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + RelLookupTable->setAlignment(llvm::Align(4)); + return RelLookupTable; +} + +static void convertToRelLookupTable(GlobalVariable &LookupTable) { + GetElementPtrInst *GEP = + cast<GetElementPtrInst>(LookupTable.use_begin()->getUser()); + LoadInst *Load = cast<LoadInst>(GEP->use_begin()->getUser()); + + Module &M = *LookupTable.getParent(); + BasicBlock *BB = GEP->getParent(); + IRBuilder<> Builder(BB); + Function &Func = *BB->getParent(); + + // Generate an array that consists of relative offsets. + GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable); + + // Place new instruction sequence before GEP. + Builder.SetInsertPoint(GEP); + Value *Index = GEP->getOperand(2); + IntegerType *IntTy = cast<IntegerType>(Index->getType()); + Value *Offset = + Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); + + Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration( + &M, Intrinsic::load_relative, {Index->getType()}); + Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy()); + + // Create a call to load.relative intrinsic that computes the target address + // by adding base address (lookup table address) and relative offset. + Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset}, + "reltable.intrinsic"); + + // Create a bitcast instruction if necessary. + if (Load->getType() != Builder.getInt8PtrTy()) + Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast"); + + // Replace load instruction with the new generated instruction sequence. + Load->replaceAllUsesWith(Result); + // Remove Load and GEP instructions. + Load->eraseFromParent(); + GEP->eraseFromParent(); +} + +// Convert lookup tables to relative lookup tables in the module. +static bool convertToRelativeLookupTables( + Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) { + Module::iterator FI = M.begin(); + if (FI == M.end()) + return false; + + // Check if we have a target that supports relative lookup tables. + if (!GetTTI(*FI).shouldBuildRelLookupTables()) + return false; + + bool Changed = false; + + for (auto GVI = M.global_begin(), E = M.global_end(); GVI != E;) { + GlobalVariable &GV = *GVI++; + + if (!shouldConvertToRelLookupTable(M, GV)) + continue; + + convertToRelLookupTable(GV); + + // Remove the original lookup table. + GV.eraseFromParent(); + + Changed = true; + } + + return Changed; +} + +PreservedAnalyses RelLookupTableConverterPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + + auto GetTTI = [&](Function &F) -> TargetTransformInfo & { + return FAM.getResult<TargetIRAnalysis>(F); + }; + + if (!convertToRelativeLookupTables(M, GetTTI)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp new file mode 100644 index 000000000000..4cf99abcc10f --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -0,0 +1,1713 @@ +//===- SCCPSolver.cpp - SCCP Utility --------------------------- *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements the Sparse Conditional Constant Propagation (SCCP) +// utility. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SCCPSolver.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" +#include <cassert> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "sccp" + +// The maximum number of range extensions allowed for operations requiring +// widening. +static const unsigned MaxNumRangeExtensions = 10; + +/// Returns MergeOptions with MaxWidenSteps set to MaxNumRangeExtensions. +static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() { + return ValueLatticeElement::MergeOptions().setMaxWidenSteps( + MaxNumRangeExtensions); +} + +namespace { + +// Helper to check if \p LV is either a constant or a constant +// range with a single element. This should cover exactly the same cases as the +// old ValueLatticeElement::isConstant() and is intended to be used in the +// transition to ValueLatticeElement. +bool isConstant(const ValueLatticeElement &LV) { + return LV.isConstant() || + (LV.isConstantRange() && LV.getConstantRange().isSingleElement()); +} + +// Helper to check if \p LV is either overdefined or a constant range with more +// than a single element. This should cover exactly the same cases as the old +// ValueLatticeElement::isOverdefined() and is intended to be used in the +// transition to ValueLatticeElement. +bool isOverdefined(const ValueLatticeElement &LV) { + return !LV.isUnknownOrUndef() && !isConstant(LV); +} + +} // namespace + +namespace llvm { + +/// Helper class for SCCPSolver. This implements the instruction visitor and +/// holds all the state. +class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> { + const DataLayout &DL; + std::function<const TargetLibraryInfo &(Function &)> GetTLI; + SmallPtrSet<BasicBlock *, 8> BBExecutable; // The BBs that are executable. + DenseMap<Value *, ValueLatticeElement> + ValueState; // The state each value is in. + + /// StructValueState - This maintains ValueState for values that have + /// StructType, for example for formal arguments, calls, insertelement, etc. + DenseMap<std::pair<Value *, unsigned>, ValueLatticeElement> StructValueState; + + /// GlobalValue - If we are tracking any values for the contents of a global + /// variable, we keep a mapping from the constant accessor to the element of + /// the global, to the currently known value. If the value becomes + /// overdefined, it's entry is simply removed from this map. + DenseMap<GlobalVariable *, ValueLatticeElement> TrackedGlobals; + + /// TrackedRetVals - If we are tracking arguments into and the return + /// value out of a function, it will have an entry in this map, indicating + /// what the known return value for the function is. + MapVector<Function *, ValueLatticeElement> TrackedRetVals; + + /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions + /// that return multiple values. + MapVector<std::pair<Function *, unsigned>, ValueLatticeElement> + TrackedMultipleRetVals; + + /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is + /// represented here for efficient lookup. + SmallPtrSet<Function *, 16> MRVFunctionsTracked; + + /// A list of functions whose return cannot be modified. + SmallPtrSet<Function *, 16> MustPreserveReturnsInFunctions; + + /// TrackingIncomingArguments - This is the set of functions for whose + /// arguments we make optimistic assumptions about and try to prove as + /// constants. + SmallPtrSet<Function *, 16> TrackingIncomingArguments; + + /// The reason for two worklists is that overdefined is the lowest state + /// on the lattice, and moving things to overdefined as fast as possible + /// makes SCCP converge much faster. + /// + /// By having a separate worklist, we accomplish this because everything + /// possibly overdefined will become overdefined at the soonest possible + /// point. + SmallVector<Value *, 64> OverdefinedInstWorkList; + SmallVector<Value *, 64> InstWorkList; + + // The BasicBlock work list + SmallVector<BasicBlock *, 64> BBWorkList; + + /// KnownFeasibleEdges - Entries in this set are edges which have already had + /// PHI nodes retriggered. + using Edge = std::pair<BasicBlock *, BasicBlock *>; + DenseSet<Edge> KnownFeasibleEdges; + + DenseMap<Function *, AnalysisResultsForFn> AnalysisResults; + DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers; + + LLVMContext &Ctx; + +private: + ConstantInt *getConstantInt(const ValueLatticeElement &IV) const { + return dyn_cast_or_null<ConstantInt>(getConstant(IV)); + } + + // pushToWorkList - Helper for markConstant/markOverdefined + void pushToWorkList(ValueLatticeElement &IV, Value *V); + + // Helper to push \p V to the worklist, after updating it to \p IV. Also + // prints a debug message with the updated value. + void pushToWorkListMsg(ValueLatticeElement &IV, Value *V); + + // markConstant - Make a value be marked as "constant". If the value + // is not already a constant, add it to the instruction work list so that + // the users of the instruction are updated later. + bool markConstant(ValueLatticeElement &IV, Value *V, Constant *C, + bool MayIncludeUndef = false); + + bool markConstant(Value *V, Constant *C) { + assert(!V->getType()->isStructTy() && "structs should use mergeInValue"); + return markConstant(ValueState[V], V, C); + } + + // markOverdefined - Make a value be marked as "overdefined". If the + // value is not already overdefined, add it to the overdefined instruction + // work list so that the users of the instruction are updated later. + bool markOverdefined(ValueLatticeElement &IV, Value *V); + + /// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV + /// changes. + bool mergeInValue(ValueLatticeElement &IV, Value *V, + ValueLatticeElement MergeWithV, + ValueLatticeElement::MergeOptions Opts = { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}); + + bool mergeInValue(Value *V, ValueLatticeElement MergeWithV, + ValueLatticeElement::MergeOptions Opts = { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) { + assert(!V->getType()->isStructTy() && + "non-structs should use markConstant"); + return mergeInValue(ValueState[V], V, MergeWithV, Opts); + } + + /// getValueState - Return the ValueLatticeElement object that corresponds to + /// the value. This function handles the case when the value hasn't been seen + /// yet by properly seeding constants etc. + ValueLatticeElement &getValueState(Value *V) { + assert(!V->getType()->isStructTy() && "Should use getStructValueState"); + + auto I = ValueState.insert(std::make_pair(V, ValueLatticeElement())); + ValueLatticeElement &LV = I.first->second; + + if (!I.second) + return LV; // Common case, already in the map. + + if (auto *C = dyn_cast<Constant>(V)) + LV.markConstant(C); // Constants are constant + + // All others are unknown by default. + return LV; + } + + /// getStructValueState - Return the ValueLatticeElement object that + /// corresponds to the value/field pair. This function handles the case when + /// the value hasn't been seen yet by properly seeding constants etc. + ValueLatticeElement &getStructValueState(Value *V, unsigned i) { + assert(V->getType()->isStructTy() && "Should use getValueState"); + assert(i < cast<StructType>(V->getType())->getNumElements() && + "Invalid element #"); + + auto I = StructValueState.insert( + std::make_pair(std::make_pair(V, i), ValueLatticeElement())); + ValueLatticeElement &LV = I.first->second; + + if (!I.second) + return LV; // Common case, already in the map. + + if (auto *C = dyn_cast<Constant>(V)) { + Constant *Elt = C->getAggregateElement(i); + + if (!Elt) + LV.markOverdefined(); // Unknown sort of constant. + else if (isa<UndefValue>(Elt)) + ; // Undef values remain unknown. + else + LV.markConstant(Elt); // Constants are constant. + } + + // All others are underdefined by default. + return LV; + } + + /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB + /// work list if it is not already executable. + bool markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest); + + // getFeasibleSuccessors - Return a vector of booleans to indicate which + // successors are reachable from a given terminator instruction. + void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs); + + // OperandChangedState - This method is invoked on all of the users of an + // instruction that was just changed state somehow. Based on this + // information, we need to update the specified user of this instruction. + void operandChangedState(Instruction *I) { + if (BBExecutable.count(I->getParent())) // Inst is executable? + visit(*I); + } + + // Add U as additional user of V. + void addAdditionalUser(Value *V, User *U) { + auto Iter = AdditionalUsers.insert({V, {}}); + Iter.first->second.insert(U); + } + + // Mark I's users as changed, including AdditionalUsers. + void markUsersAsChanged(Value *I) { + // Functions include their arguments in the use-list. Changed function + // values mean that the result of the function changed. We only need to + // update the call sites with the new function result and do not have to + // propagate the call arguments. + if (isa<Function>(I)) { + for (User *U : I->users()) { + if (auto *CB = dyn_cast<CallBase>(U)) + handleCallResult(*CB); + } + } else { + for (User *U : I->users()) + if (auto *UI = dyn_cast<Instruction>(U)) + operandChangedState(UI); + } + + auto Iter = AdditionalUsers.find(I); + if (Iter != AdditionalUsers.end()) { + // Copy additional users before notifying them of changes, because new + // users may be added, potentially invalidating the iterator. + SmallVector<Instruction *, 2> ToNotify; + for (User *U : Iter->second) + if (auto *UI = dyn_cast<Instruction>(U)) + ToNotify.push_back(UI); + for (Instruction *UI : ToNotify) + operandChangedState(UI); + } + } + void handleCallOverdefined(CallBase &CB); + void handleCallResult(CallBase &CB); + void handleCallArguments(CallBase &CB); + +private: + friend class InstVisitor<SCCPInstVisitor>; + + // visit implementations - Something changed in this instruction. Either an + // operand made a transition, or the instruction is newly executable. Change + // the value type of I to reflect these changes if appropriate. + void visitPHINode(PHINode &I); + + // Terminators + + void visitReturnInst(ReturnInst &I); + void visitTerminator(Instruction &TI); + + void visitCastInst(CastInst &I); + void visitSelectInst(SelectInst &I); + void visitUnaryOperator(Instruction &I); + void visitBinaryOperator(Instruction &I); + void visitCmpInst(CmpInst &I); + void visitExtractValueInst(ExtractValueInst &EVI); + void visitInsertValueInst(InsertValueInst &IVI); + + void visitCatchSwitchInst(CatchSwitchInst &CPI) { + markOverdefined(&CPI); + visitTerminator(CPI); + } + + // Instructions that cannot be folded away. + + void visitStoreInst(StoreInst &I); + void visitLoadInst(LoadInst &I); + void visitGetElementPtrInst(GetElementPtrInst &I); + + void visitInvokeInst(InvokeInst &II) { + visitCallBase(II); + visitTerminator(II); + } + + void visitCallBrInst(CallBrInst &CBI) { + visitCallBase(CBI); + visitTerminator(CBI); + } + + void visitCallBase(CallBase &CB); + void visitResumeInst(ResumeInst &I) { /*returns void*/ + } + void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ + } + void visitFenceInst(FenceInst &I) { /*returns void*/ + } + + void visitInstruction(Instruction &I); + +public: + void addAnalysis(Function &F, AnalysisResultsForFn A) { + AnalysisResults.insert({&F, std::move(A)}); + } + + void visitCallInst(CallInst &I) { visitCallBase(I); } + + bool markBlockExecutable(BasicBlock *BB); + + const PredicateBase *getPredicateInfoFor(Instruction *I) { + auto A = AnalysisResults.find(I->getParent()->getParent()); + if (A == AnalysisResults.end()) + return nullptr; + return A->second.PredInfo->getPredicateInfoFor(I); + } + + DomTreeUpdater getDTU(Function &F) { + auto A = AnalysisResults.find(&F); + assert(A != AnalysisResults.end() && "Need analysis results for function."); + return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy}; + } + + SCCPInstVisitor(const DataLayout &DL, + std::function<const TargetLibraryInfo &(Function &)> GetTLI, + LLVMContext &Ctx) + : DL(DL), GetTLI(GetTLI), Ctx(Ctx) {} + + void trackValueOfGlobalVariable(GlobalVariable *GV) { + // We only track the contents of scalar globals. + if (GV->getValueType()->isSingleValueType()) { + ValueLatticeElement &IV = TrackedGlobals[GV]; + if (!isa<UndefValue>(GV->getInitializer())) + IV.markConstant(GV->getInitializer()); + } + } + + void addTrackedFunction(Function *F) { + // Add an entry, F -> undef. + if (auto *STy = dyn_cast<StructType>(F->getReturnType())) { + MRVFunctionsTracked.insert(F); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + TrackedMultipleRetVals.insert( + std::make_pair(std::make_pair(F, i), ValueLatticeElement())); + } else if (!F->getReturnType()->isVoidTy()) + TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement())); + } + + void addToMustPreserveReturnsInFunctions(Function *F) { + MustPreserveReturnsInFunctions.insert(F); + } + + bool mustPreserveReturn(Function *F) { + return MustPreserveReturnsInFunctions.count(F); + } + + void addArgumentTrackedFunction(Function *F) { + TrackingIncomingArguments.insert(F); + } + + bool isArgumentTrackedFunction(Function *F) { + return TrackingIncomingArguments.count(F); + } + + void solve(); + + bool resolvedUndefsIn(Function &F); + + bool isBlockExecutable(BasicBlock *BB) const { + return BBExecutable.count(BB); + } + + bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const; + + std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const { + std::vector<ValueLatticeElement> StructValues; + auto *STy = dyn_cast<StructType>(V->getType()); + assert(STy && "getStructLatticeValueFor() can be called only on structs"); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto I = StructValueState.find(std::make_pair(V, i)); + assert(I != StructValueState.end() && "Value not in valuemap!"); + StructValues.push_back(I->second); + } + return StructValues; + } + + void removeLatticeValueFor(Value *V) { ValueState.erase(V); } + + const ValueLatticeElement &getLatticeValueFor(Value *V) const { + assert(!V->getType()->isStructTy() && + "Should use getStructLatticeValueFor"); + DenseMap<Value *, ValueLatticeElement>::const_iterator I = + ValueState.find(V); + assert(I != ValueState.end() && + "V not found in ValueState nor Paramstate map!"); + return I->second; + } + + const MapVector<Function *, ValueLatticeElement> &getTrackedRetVals() { + return TrackedRetVals; + } + + const DenseMap<GlobalVariable *, ValueLatticeElement> &getTrackedGlobals() { + return TrackedGlobals; + } + + const SmallPtrSet<Function *, 16> getMRVFunctionsTracked() { + return MRVFunctionsTracked; + } + + void markOverdefined(Value *V) { + if (auto *STy = dyn_cast<StructType>(V->getType())) + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + markOverdefined(getStructValueState(V, i), V); + else + markOverdefined(ValueState[V], V); + } + + bool isStructLatticeConstant(Function *F, StructType *STy); + + Constant *getConstant(const ValueLatticeElement &LV) const; + + SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions() { + return TrackingIncomingArguments; + } + + void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C); + + void markFunctionUnreachable(Function *F) { + for (auto &BB : *F) + BBExecutable.erase(&BB); + } +}; + +} // namespace llvm + +bool SCCPInstVisitor::markBlockExecutable(BasicBlock *BB) { + if (!BBExecutable.insert(BB).second) + return false; + LLVM_DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << '\n'); + BBWorkList.push_back(BB); // Add the block to the work list! + return true; +} + +void SCCPInstVisitor::pushToWorkList(ValueLatticeElement &IV, Value *V) { + if (IV.isOverdefined()) + return OverdefinedInstWorkList.push_back(V); + InstWorkList.push_back(V); +} + +void SCCPInstVisitor::pushToWorkListMsg(ValueLatticeElement &IV, Value *V) { + LLVM_DEBUG(dbgs() << "updated " << IV << ": " << *V << '\n'); + pushToWorkList(IV, V); +} + +bool SCCPInstVisitor::markConstant(ValueLatticeElement &IV, Value *V, + Constant *C, bool MayIncludeUndef) { + if (!IV.markConstant(C, MayIncludeUndef)) + return false; + LLVM_DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n'); + pushToWorkList(IV, V); + return true; +} + +bool SCCPInstVisitor::markOverdefined(ValueLatticeElement &IV, Value *V) { + if (!IV.markOverdefined()) + return false; + + LLVM_DEBUG(dbgs() << "markOverdefined: "; + if (auto *F = dyn_cast<Function>(V)) dbgs() + << "Function '" << F->getName() << "'\n"; + else dbgs() << *V << '\n'); + // Only instructions go on the work list + pushToWorkList(IV, V); + return true; +} + +bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i)); + assert(It != TrackedMultipleRetVals.end()); + ValueLatticeElement LV = It->second; + if (!isConstant(LV)) + return false; + } + return true; +} + +Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const { + if (LV.isConstant()) + return LV.getConstant(); + + if (LV.isConstantRange()) { + const auto &CR = LV.getConstantRange(); + if (CR.getSingleElement()) + return ConstantInt::get(Ctx, *CR.getSingleElement()); + } + return nullptr; +} + +void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, Argument *A, + Constant *C) { + assert(F->arg_size() == A->getParent()->arg_size() && + "Functions should have the same number of arguments"); + + // Mark the argument constant in the new function. + markConstant(A, C); + + // For the remaining arguments in the new function, copy the lattice state + // over from the old function. + for (auto I = F->arg_begin(), J = A->getParent()->arg_begin(), + E = F->arg_end(); + I != E; ++I, ++J) + if (J != A && ValueState.count(I)) { + ValueState[J] = ValueState[I]; + pushToWorkList(ValueState[J], J); + } +} + +void SCCPInstVisitor::visitInstruction(Instruction &I) { + // All the instructions we don't do any special handling for just + // go to overdefined. + LLVM_DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n'); + markOverdefined(&I); +} + +bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V, + ValueLatticeElement MergeWithV, + ValueLatticeElement::MergeOptions Opts) { + if (IV.mergeIn(MergeWithV, Opts)) { + pushToWorkList(IV, V); + LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : " + << IV << "\n"); + return true; + } + return false; +} + +bool SCCPInstVisitor::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return false; // This edge is already known to be executable! + + if (!markBlockExecutable(Dest)) { + // If the destination is already executable, we just made an *edge* + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + LLVM_DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << '\n'); + + for (PHINode &PN : Dest->phis()) + visitPHINode(PN); + } + return true; +} + +// getFeasibleSuccessors - Return a vector of booleans to indicate which +// successors are reachable from a given terminator instruction. +void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, + SmallVectorImpl<bool> &Succs) { + Succs.resize(TI.getNumSuccessors()); + if (auto *BI = dyn_cast<BranchInst>(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + ValueLatticeElement BCValue = getValueState(BI->getCondition()); + ConstantInt *CI = getConstantInt(BCValue); + if (!CI) { + // Overdefined condition variables, and branches on unfoldable constant + // conditions, mean the branch could go either way. + if (!BCValue.isUnknownOrUndef()) + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way. + Succs[CI->isZero()] = true; + return; + } + + // Unwinding instructions successors are always executable. + if (TI.isExceptionalTerminator()) { + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + if (auto *SI = dyn_cast<SwitchInst>(&TI)) { + if (!SI->getNumCases()) { + Succs[0] = true; + return; + } + const ValueLatticeElement &SCValue = getValueState(SI->getCondition()); + if (ConstantInt *CI = getConstantInt(SCValue)) { + Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true; + return; + } + + // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM + // is ready. + if (SCValue.isConstantRange(/*UndefAllowed=*/false)) { + const ConstantRange &Range = SCValue.getConstantRange(); + for (const auto &Case : SI->cases()) { + const APInt &CaseValue = Case.getCaseValue()->getValue(); + if (Range.contains(CaseValue)) + Succs[Case.getSuccessorIndex()] = true; + } + + // TODO: Determine whether default case is reachable. + Succs[SI->case_default()->getSuccessorIndex()] = true; + return; + } + + // Overdefined or unknown condition? All destinations are executable! + if (!SCValue.isUnknownOrUndef()) + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // In case of indirect branch and its address is a blockaddress, we mark + // the target as executable. + if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) { + // Casts are folded by visitCastInst. + ValueLatticeElement IBRValue = getValueState(IBR->getAddress()); + BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(getConstant(IBRValue)); + if (!Addr) { // Overdefined or unknown condition? + // All destinations are executable! + if (!IBRValue.isUnknownOrUndef()) + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + BasicBlock *T = Addr->getBasicBlock(); + assert(Addr->getFunction() == T->getParent() && + "Block address of a different function ?"); + for (unsigned i = 0; i < IBR->getNumSuccessors(); ++i) { + // This is the target. + if (IBR->getDestination(i) == T) { + Succs[i] = true; + return; + } + } + + // If we didn't find our destination in the IBR successor list, then we + // have undefined behavior. Its ok to assume no successor is executable. + return; + } + + // In case of callbr, we pessimistically assume that all successors are + // feasible. + if (isa<CallBrInst>(&TI)) { + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n'); + llvm_unreachable("SCCP: Don't know how to handle this terminator!"); +} + +// isEdgeFeasible - Return true if the control flow edge from the 'From' basic +// block to the 'To' basic block is currently feasible. +bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { + // Check if we've called markEdgeExecutable on the edge yet. (We could + // be more aggressive and try to consider edges which haven't been marked + // yet, but there isn't any need.) + return KnownFeasibleEdges.count(Edge(From, To)); +} + +// visit Implementations - Something changed in this instruction, either an +// operand made a transition, or the instruction is newly executable. Change +// the value type of I to reflect these changes if appropriate. This method +// makes sure to do the following actions: +// +// 1. If a phi node merges two constants in, and has conflicting value coming +// from different branches, or if the PHI node merges in an overdefined +// value, then the PHI node becomes overdefined. +// 2. If a phi node merges only constants in, and they all agree on value, the +// PHI node becomes a constant value equal to that. +// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant +// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined +// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined +// 6. If a conditional branch has a value that is constant, make the selected +// destination executable +// 7. If a conditional branch has a value that is overdefined, make all +// successors executable. +void SCCPInstVisitor::visitPHINode(PHINode &PN) { + // If this PN returns a struct, just mark the result overdefined. + // TODO: We could do a lot better than this if code actually uses this. + if (PN.getType()->isStructTy()) + return (void)markOverdefined(&PN); + + if (getValueState(&PN).isOverdefined()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) + return (void)markOverdefined(&PN); + + unsigned NumActiveIncoming = 0; + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. If they are all + // constant, and they agree with each other, the PHI becomes the identical + // constant. If they are constant and don't agree, the PHI is a constant + // range. If there are no executable operands, the PHI remains unknown. + ValueLatticeElement PhiState = getValueState(&PN); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) + continue; + + ValueLatticeElement IV = getValueState(PN.getIncomingValue(i)); + PhiState.mergeIn(IV); + NumActiveIncoming++; + if (PhiState.isOverdefined()) + break; + } + + // We allow up to 1 range extension per active incoming value and one + // additional extension. Note that we manually adjust the number of range + // extensions to match the number of active incoming values. This helps to + // limit multiple extensions caused by the same incoming value, if other + // incoming values are equal. + mergeInValue(&PN, PhiState, + ValueLatticeElement::MergeOptions().setMaxWidenSteps( + NumActiveIncoming + 1)); + ValueLatticeElement &PhiStateRef = getValueState(&PN); + PhiStateRef.setNumRangeExtensions( + std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions())); +} + +void SCCPInstVisitor::visitReturnInst(ReturnInst &I) { + if (I.getNumOperands() == 0) + return; // ret void + + Function *F = I.getParent()->getParent(); + Value *ResultOp = I.getOperand(0); + + // If we are tracking the return value of this function, merge it in. + if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) { + auto TFRVI = TrackedRetVals.find(F); + if (TFRVI != TrackedRetVals.end()) { + mergeInValue(TFRVI->second, F, getValueState(ResultOp)); + return; + } + } + + // Handle functions that return multiple values. + if (!TrackedMultipleRetVals.empty()) { + if (auto *STy = dyn_cast<StructType>(ResultOp->getType())) + if (MRVFunctionsTracked.count(F)) + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F, + getStructValueState(ResultOp, i)); + } +} + +void SCCPInstVisitor::visitTerminator(Instruction &TI) { + SmallVector<bool, 16> SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable. + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +void SCCPInstVisitor::visitCastInst(CastInst &I) { + // ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&I].isOverdefined()) + return; + + ValueLatticeElement OpSt = getValueState(I.getOperand(0)); + if (Constant *OpC = getConstant(OpSt)) { + // Fold the constant as we build. + Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL); + if (isa<UndefValue>(C)) + return; + // Propagate constant value + markConstant(&I, C); + } else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) { + auto &LV = getValueState(&I); + ConstantRange OpRange = OpSt.getConstantRange(); + Type *DestTy = I.getDestTy(); + // Vectors where all elements have the same known constant range are treated + // as a single constant range in the lattice. When bitcasting such vectors, + // there is a mis-match between the width of the lattice value (single + // constant range) and the original operands (vector). Go to overdefined in + // that case. + if (I.getOpcode() == Instruction::BitCast && + I.getOperand(0)->getType()->isVectorTy() && + OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy)) + return (void)markOverdefined(&I); + + ConstantRange Res = + OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy)); + mergeInValue(LV, &I, ValueLatticeElement::getRange(Res)); + } else if (!OpSt.isUnknownOrUndef()) + markOverdefined(&I); +} + +void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) { + // If this returns a struct, mark all elements over defined, we don't track + // structs in structs. + if (EVI.getType()->isStructTy()) + return (void)markOverdefined(&EVI); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&EVI].isOverdefined()) + return (void)markOverdefined(&EVI); + + // If this is extracting from more than one level of struct, we don't know. + if (EVI.getNumIndices() != 1) + return (void)markOverdefined(&EVI); + + Value *AggVal = EVI.getAggregateOperand(); + if (AggVal->getType()->isStructTy()) { + unsigned i = *EVI.idx_begin(); + ValueLatticeElement EltVal = getStructValueState(AggVal, i); + mergeInValue(getValueState(&EVI), &EVI, EltVal); + } else { + // Otherwise, must be extracting from an array. + return (void)markOverdefined(&EVI); + } +} + +void SCCPInstVisitor::visitInsertValueInst(InsertValueInst &IVI) { + auto *STy = dyn_cast<StructType>(IVI.getType()); + if (!STy) + return (void)markOverdefined(&IVI); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (isOverdefined(ValueState[&IVI])) + return (void)markOverdefined(&IVI); + + // If this has more than one index, we can't handle it, drive all results to + // undef. + if (IVI.getNumIndices() != 1) + return (void)markOverdefined(&IVI); + + Value *Aggr = IVI.getAggregateOperand(); + unsigned Idx = *IVI.idx_begin(); + + // Compute the result based on what we're inserting. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + // This passes through all values that aren't the inserted element. + if (i != Idx) { + ValueLatticeElement EltVal = getStructValueState(Aggr, i); + mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal); + continue; + } + + Value *Val = IVI.getInsertedValueOperand(); + if (Val->getType()->isStructTy()) + // We don't track structs in structs. + markOverdefined(getStructValueState(&IVI, i), &IVI); + else { + ValueLatticeElement InVal = getValueState(Val); + mergeInValue(getStructValueState(&IVI, i), &IVI, InVal); + } + } +} + +void SCCPInstVisitor::visitSelectInst(SelectInst &I) { + // If this select returns a struct, just mark the result overdefined. + // TODO: We could do a lot better than this if code actually uses this. + if (I.getType()->isStructTy()) + return (void)markOverdefined(&I); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&I].isOverdefined()) + return (void)markOverdefined(&I); + + ValueLatticeElement CondValue = getValueState(I.getCondition()); + if (CondValue.isUnknownOrUndef()) + return; + + if (ConstantInt *CondCB = getConstantInt(CondValue)) { + Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue(); + mergeInValue(&I, getValueState(OpVal)); + return; + } + + // Otherwise, the condition is overdefined or a constant we can't evaluate. + // See if we can produce something better than overdefined based on the T/F + // value. + ValueLatticeElement TVal = getValueState(I.getTrueValue()); + ValueLatticeElement FVal = getValueState(I.getFalseValue()); + + bool Changed = ValueState[&I].mergeIn(TVal); + Changed |= ValueState[&I].mergeIn(FVal); + if (Changed) + pushToWorkListMsg(ValueState[&I], &I); +} + +// Handle Unary Operators. +void SCCPInstVisitor::visitUnaryOperator(Instruction &I) { + ValueLatticeElement V0State = getValueState(I.getOperand(0)); + + ValueLatticeElement &IV = ValueState[&I]; + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (isOverdefined(IV)) + return (void)markOverdefined(&I); + + if (isConstant(V0State)) { + Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V0State)); + + // op Y -> undef. + if (isa<UndefValue>(C)) + return; + return (void)markConstant(IV, &I, C); + } + + // If something is undef, wait for it to resolve. + if (!isOverdefined(V0State)) + return; + + markOverdefined(&I); +} + +// Handle Binary Operators. +void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { + ValueLatticeElement V1State = getValueState(I.getOperand(0)); + ValueLatticeElement V2State = getValueState(I.getOperand(1)); + + ValueLatticeElement &IV = ValueState[&I]; + if (IV.isOverdefined()) + return; + + // If something is undef, wait for it to resolve. + if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) + return; + + if (V1State.isOverdefined() && V2State.isOverdefined()) + return (void)markOverdefined(&I); + + // If either of the operands is a constant, try to fold it to a constant. + // TODO: Use information from notconstant better. + if ((V1State.isConstant() || V2State.isConstant())) { + Value *V1 = isConstant(V1State) ? getConstant(V1State) : I.getOperand(0); + Value *V2 = isConstant(V2State) ? getConstant(V2State) : I.getOperand(1); + Value *R = SimplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL)); + auto *C = dyn_cast_or_null<Constant>(R); + if (C) { + // X op Y -> undef. + if (isa<UndefValue>(C)) + return; + // Conservatively assume that the result may be based on operands that may + // be undef. Note that we use mergeInValue to combine the constant with + // the existing lattice value for I, as different constants might be found + // after one of the operands go to overdefined, e.g. due to one operand + // being a special floating value. + ValueLatticeElement NewV; + NewV.markConstant(C, /*MayIncludeUndef=*/true); + return (void)mergeInValue(&I, NewV); + } + } + + // Only use ranges for binary operators on integers. + if (!I.getType()->isIntegerTy()) + return markOverdefined(&I); + + // Try to simplify to a constant range. + ConstantRange A = ConstantRange::getFull(I.getType()->getScalarSizeInBits()); + ConstantRange B = ConstantRange::getFull(I.getType()->getScalarSizeInBits()); + if (V1State.isConstantRange()) + A = V1State.getConstantRange(); + if (V2State.isConstantRange()) + B = V2State.getConstantRange(); + + ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B); + mergeInValue(&I, ValueLatticeElement::getRange(R)); + + // TODO: Currently we do not exploit special values that produce something + // better than overdefined with an overdefined operand for vector or floating + // point types, like and <4 x i32> overdefined, zeroinitializer. +} + +// Handle ICmpInst instruction. +void SCCPInstVisitor::visitCmpInst(CmpInst &I) { + // Do not cache this lookup, getValueState calls later in the function might + // invalidate the reference. + if (isOverdefined(ValueState[&I])) + return (void)markOverdefined(&I); + + Value *Op1 = I.getOperand(0); + Value *Op2 = I.getOperand(1); + + // For parameters, use ParamState which includes constant range info if + // available. + auto V1State = getValueState(Op1); + auto V2State = getValueState(Op2); + + Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State); + if (C) { + if (isa<UndefValue>(C)) + return; + ValueLatticeElement CV; + CV.markConstant(C); + mergeInValue(&I, CV); + return; + } + + // If operands are still unknown, wait for it to resolve. + if ((V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) && + !isConstant(ValueState[&I])) + return; + + markOverdefined(&I); +} + +// Handle getelementptr instructions. If all operands are constants then we +// can turn this into a getelementptr ConstantExpr. +void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { + if (isOverdefined(ValueState[&I])) + return (void)markOverdefined(&I); + + SmallVector<Constant *, 8> Operands; + Operands.reserve(I.getNumOperands()); + + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + ValueLatticeElement State = getValueState(I.getOperand(i)); + if (State.isUnknownOrUndef()) + return; // Operands are not resolved yet. + + if (isOverdefined(State)) + return (void)markOverdefined(&I); + + if (Constant *C = getConstant(State)) { + Operands.push_back(C); + continue; + } + + return (void)markOverdefined(&I); + } + + Constant *Ptr = Operands[0]; + auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end()); + Constant *C = + ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices); + if (isa<UndefValue>(C)) + return; + markConstant(&I, C); +} + +void SCCPInstVisitor::visitStoreInst(StoreInst &SI) { + // If this store is of a struct, ignore it. + if (SI.getOperand(0)->getType()->isStructTy()) + return; + + if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1))) + return; + + GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1)); + auto I = TrackedGlobals.find(GV); + if (I == TrackedGlobals.end()) + return; + + // Get the value we are storing into the global, then merge it. + mergeInValue(I->second, GV, getValueState(SI.getOperand(0)), + ValueLatticeElement::MergeOptions().setCheckWiden(false)); + if (I->second.isOverdefined()) + TrackedGlobals.erase(I); // No need to keep tracking this! +} + +static ValueLatticeElement getValueFromMetadata(const Instruction *I) { + if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) + if (I->getType()->isIntegerTy()) + return ValueLatticeElement::getRange( + getConstantRangeFromMetadata(*Ranges)); + if (I->hasMetadata(LLVMContext::MD_nonnull)) + return ValueLatticeElement::getNot( + ConstantPointerNull::get(cast<PointerType>(I->getType()))); + return ValueLatticeElement::getOverdefined(); +} + +// Handle load instructions. If the operand is a constant pointer to a constant +// global, we can replace the load with the loaded constant value! +void SCCPInstVisitor::visitLoadInst(LoadInst &I) { + // If this load is of a struct or the load is volatile, just mark the result + // as overdefined. + if (I.getType()->isStructTy() || I.isVolatile()) + return (void)markOverdefined(&I); + + // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would + // discover a concrete value later. + if (ValueState[&I].isOverdefined()) + return (void)markOverdefined(&I); + + ValueLatticeElement PtrVal = getValueState(I.getOperand(0)); + if (PtrVal.isUnknownOrUndef()) + return; // The pointer is not resolved yet! + + ValueLatticeElement &IV = ValueState[&I]; + + if (isConstant(PtrVal)) { + Constant *Ptr = getConstant(PtrVal); + + // load null is undefined. + if (isa<ConstantPointerNull>(Ptr)) { + if (NullPointerIsDefined(I.getFunction(), I.getPointerAddressSpace())) + return (void)markOverdefined(IV, &I); + else + return; + } + + // Transform load (constant global) into the value loaded. + if (auto *GV = dyn_cast<GlobalVariable>(Ptr)) { + if (!TrackedGlobals.empty()) { + // If we are tracking this global, merge in the known value for it. + auto It = TrackedGlobals.find(GV); + if (It != TrackedGlobals.end()) { + mergeInValue(IV, &I, It->second, getMaxWidenStepsOpts()); + return; + } + } + } + + // Transform load from a constant into a constant if possible. + if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) { + if (isa<UndefValue>(C)) + return; + return (void)markConstant(IV, &I, C); + } + } + + // Fall back to metadata. + mergeInValue(&I, getValueFromMetadata(&I)); +} + +void SCCPInstVisitor::visitCallBase(CallBase &CB) { + handleCallResult(CB); + handleCallArguments(CB); +} + +void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) { + Function *F = CB.getCalledFunction(); + + // Void return and not tracking callee, just bail. + if (CB.getType()->isVoidTy()) + return; + + // Always mark struct return as overdefined. + if (CB.getType()->isStructTy()) + return (void)markOverdefined(&CB); + + // Otherwise, if we have a single return value case, and if the function is + // a declaration, maybe we can constant fold it. + if (F && F->isDeclaration() && canConstantFoldCallTo(&CB, F)) { + SmallVector<Constant *, 8> Operands; + for (auto AI = CB.arg_begin(), E = CB.arg_end(); AI != E; ++AI) { + if (AI->get()->getType()->isStructTy()) + return markOverdefined(&CB); // Can't handle struct args. + ValueLatticeElement State = getValueState(*AI); + + if (State.isUnknownOrUndef()) + return; // Operands are not resolved yet. + if (isOverdefined(State)) + return (void)markOverdefined(&CB); + assert(isConstant(State) && "Unknown state!"); + Operands.push_back(getConstant(State)); + } + + if (isOverdefined(getValueState(&CB))) + return (void)markOverdefined(&CB); + + // If we can constant fold this, mark the result of the call as a + // constant. + if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) { + // call -> undef. + if (isa<UndefValue>(C)) + return; + return (void)markConstant(&CB, C); + } + } + + // Fall back to metadata. + mergeInValue(&CB, getValueFromMetadata(&CB)); +} + +void SCCPInstVisitor::handleCallArguments(CallBase &CB) { + Function *F = CB.getCalledFunction(); + // If this is a local function that doesn't have its address taken, mark its + // entry block executable and merge in the actual arguments to the call into + // the formal arguments of the function. + if (!TrackingIncomingArguments.empty() && + TrackingIncomingArguments.count(F)) { + markBlockExecutable(&F->front()); + + // Propagate information from this call site into the callee. + auto CAI = CB.arg_begin(); + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++CAI) { + // If this argument is byval, and if the function is not readonly, there + // will be an implicit copy formed of the input aggregate. + if (AI->hasByValAttr() && !F->onlyReadsMemory()) { + markOverdefined(&*AI); + continue; + } + + if (auto *STy = dyn_cast<StructType>(AI->getType())) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + ValueLatticeElement CallArg = getStructValueState(*CAI, i); + mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg, + getMaxWidenStepsOpts()); + } + } else + mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts()); + } + } +} + +void SCCPInstVisitor::handleCallResult(CallBase &CB) { + Function *F = CB.getCalledFunction(); + + if (auto *II = dyn_cast<IntrinsicInst>(&CB)) { + if (II->getIntrinsicID() == Intrinsic::ssa_copy) { + if (ValueState[&CB].isOverdefined()) + return; + + Value *CopyOf = CB.getOperand(0); + ValueLatticeElement CopyOfVal = getValueState(CopyOf); + const auto *PI = getPredicateInfoFor(&CB); + assert(PI && "Missing predicate info for ssa.copy"); + + const Optional<PredicateConstraint> &Constraint = PI->getConstraint(); + if (!Constraint) { + mergeInValue(ValueState[&CB], &CB, CopyOfVal); + return; + } + + CmpInst::Predicate Pred = Constraint->Predicate; + Value *OtherOp = Constraint->OtherOp; + + // Wait until OtherOp is resolved. + if (getValueState(OtherOp).isUnknown()) { + addAdditionalUser(OtherOp, &CB); + return; + } + + // TODO: Actually filp MayIncludeUndef for the created range to false, + // once most places in the optimizer respect the branches on + // undef/poison are UB rule. The reason why the new range cannot be + // undef is as follows below: + // The new range is based on a branch condition. That guarantees that + // neither of the compare operands can be undef in the branch targets, + // unless we have conditions that are always true/false (e.g. icmp ule + // i32, %a, i32_max). For the latter overdefined/empty range will be + // inferred, but the branch will get folded accordingly anyways. + bool MayIncludeUndef = !isa<PredicateAssume>(PI); + + ValueLatticeElement CondVal = getValueState(OtherOp); + ValueLatticeElement &IV = ValueState[&CB]; + if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) { + auto ImposedCR = + ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType())); + + // Get the range imposed by the condition. + if (CondVal.isConstantRange()) + ImposedCR = ConstantRange::makeAllowedICmpRegion( + Pred, CondVal.getConstantRange()); + + // Combine range info for the original value with the new range from the + // condition. + auto CopyOfCR = CopyOfVal.isConstantRange() + ? CopyOfVal.getConstantRange() + : ConstantRange::getFull( + DL.getTypeSizeInBits(CopyOf->getType())); + auto NewCR = ImposedCR.intersectWith(CopyOfCR); + // If the existing information is != x, do not use the information from + // a chained predicate, as the != x information is more likely to be + // helpful in practice. + if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement()) + NewCR = CopyOfCR; + + addAdditionalUser(OtherOp, &CB); + mergeInValue(IV, &CB, + ValueLatticeElement::getRange(NewCR, MayIncludeUndef)); + return; + } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { + // For non-integer values or integer constant expressions, only + // propagate equal constants. + addAdditionalUser(OtherOp, &CB); + mergeInValue(IV, &CB, CondVal); + return; + } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() && + !MayIncludeUndef) { + // Propagate inequalities. + addAdditionalUser(OtherOp, &CB); + mergeInValue(IV, &CB, + ValueLatticeElement::getNot(CondVal.getConstant())); + return; + } + + return (void)mergeInValue(IV, &CB, CopyOfVal); + } + + if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { + // Compute result range for intrinsics supported by ConstantRange. + // Do this even if we don't know a range for all operands, as we may + // still know something about the result range, e.g. of abs(x). + SmallVector<ConstantRange, 2> OpRanges; + for (Value *Op : II->args()) { + const ValueLatticeElement &State = getValueState(Op); + if (State.isConstantRange()) + OpRanges.push_back(State.getConstantRange()); + else + OpRanges.push_back( + ConstantRange::getFull(Op->getType()->getScalarSizeInBits())); + } + + ConstantRange Result = + ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); + return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + } + } + + // The common case is that we aren't tracking the callee, either because we + // are not doing interprocedural analysis or the callee is indirect, or is + // external. Handle these cases first. + if (!F || F->isDeclaration()) + return handleCallOverdefined(CB); + + // If this is a single/zero retval case, see if we're tracking the function. + if (auto *STy = dyn_cast<StructType>(F->getReturnType())) { + if (!MRVFunctionsTracked.count(F)) + return handleCallOverdefined(CB); // Not tracking this callee. + + // If we are tracking this callee, propagate the result of the function + // into this call site. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + mergeInValue(getStructValueState(&CB, i), &CB, + TrackedMultipleRetVals[std::make_pair(F, i)], + getMaxWidenStepsOpts()); + } else { + auto TFRVI = TrackedRetVals.find(F); + if (TFRVI == TrackedRetVals.end()) + return handleCallOverdefined(CB); // Not tracking this callee. + + // If so, propagate the return value of the callee into this call result. + mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts()); + } +} + +void SCCPInstVisitor::solve() { + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !InstWorkList.empty() || + !OverdefinedInstWorkList.empty()) { + // Process the overdefined instruction's work list first, which drives other + // things to overdefined more quickly. + while (!OverdefinedInstWorkList.empty()) { + Value *I = OverdefinedInstWorkList.pop_back_val(); + + LLVM_DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n'); + + // "I" got into the work list because it either made the transition from + // bottom to constant, or to overdefined. + // + // Anything on this worklist that is overdefined need not be visited + // since all of its users will have already been marked as overdefined + // Update all of the users of this instruction's value. + // + markUsersAsChanged(I); + } + + // Process the instruction work list. + while (!InstWorkList.empty()) { + Value *I = InstWorkList.pop_back_val(); + + LLVM_DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n'); + + // "I" got into the work list because it made the transition from undef to + // constant. + // + // Anything on this worklist that is overdefined need not be visited + // since all of its users will have already been marked as overdefined. + // Update all of the users of this instruction's value. + // + if (I->getType()->isStructTy() || !getValueState(I).isOverdefined()) + markUsersAsChanged(I); + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.pop_back_val(); + + LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n'); + + // Notify all instructions in this basic block that they are newly + // executable. + visit(BB); + } + } +} + +/// resolvedUndefsIn - While solving the dataflow for a function, we assume +/// that branches on undef values cannot reach any of their successors. +/// However, this is not a safe assumption. After we solve dataflow, this +/// method should be use to handle this. If this returns true, the solver +/// should be rerun. +/// +/// This method handles this by finding an unresolved branch and marking it one +/// of the edges from the block as being feasible, even though the condition +/// doesn't say it would otherwise be. This allows SCCP to find the rest of the +/// CFG and only slightly pessimizes the analysis results (by marking one, +/// potentially infeasible, edge feasible). This cannot usefully modify the +/// constraints on the condition of the branch, as that would impact other users +/// of the value. +/// +/// This scan also checks for values that use undefs. It conservatively marks +/// them as overdefined. +bool SCCPInstVisitor::resolvedUndefsIn(Function &F) { + bool MadeChange = false; + for (BasicBlock &BB : F) { + if (!BBExecutable.count(&BB)) + continue; + + for (Instruction &I : BB) { + // Look for instructions which produce undef values. + if (I.getType()->isVoidTy()) + continue; + + if (auto *STy = dyn_cast<StructType>(I.getType())) { + // Only a few things that can be structs matter for undef. + + // Tracked calls must never be marked overdefined in resolvedUndefsIn. + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *F = CB->getCalledFunction()) + if (MRVFunctionsTracked.count(F)) + continue; + + // extractvalue and insertvalue don't need to be marked; they are + // tracked as precisely as their operands. + if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)) + continue; + // Send the results of everything else to overdefined. We could be + // more precise than this but it isn't worth bothering. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + ValueLatticeElement &LV = getStructValueState(&I, i); + if (LV.isUnknownOrUndef()) { + markOverdefined(LV, &I); + MadeChange = true; + } + } + continue; + } + + ValueLatticeElement &LV = getValueState(&I); + if (!LV.isUnknownOrUndef()) + continue; + + // There are two reasons a call can have an undef result + // 1. It could be tracked. + // 2. It could be constant-foldable. + // Because of the way we solve return values, tracked calls must + // never be marked overdefined in resolvedUndefsIn. + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *F = CB->getCalledFunction()) + if (TrackedRetVals.count(F)) + continue; + + if (isa<LoadInst>(I)) { + // A load here means one of two things: a load of undef from a global, + // a load from an unknown pointer. Either way, having it return undef + // is okay. + continue; + } + + markOverdefined(&I); + MadeChange = true; + } + + // Check to see if we have a branch or switch on an undefined value. If so + // we force the branch to go one way or the other to make the successor + // values live. It doesn't really matter which way we force it. + Instruction *TI = BB.getTerminator(); + if (auto *BI = dyn_cast<BranchInst>(TI)) { + if (!BI->isConditional()) + continue; + if (!getValueState(BI->getCondition()).isUnknownOrUndef()) + continue; + + // If the input to SCCP is actually branch on undef, fix the undef to + // false. + if (isa<UndefValue>(BI->getCondition())) { + BI->setCondition(ConstantInt::getFalse(BI->getContext())); + markEdgeExecutable(&BB, TI->getSuccessor(1)); + MadeChange = true; + continue; + } + + // Otherwise, it is a branch on a symbolic value which is currently + // considered to be undef. Make sure some edge is executable, so a + // branch on "undef" always flows somewhere. + // FIXME: Distinguish between dead code and an LLVM "undef" value. + BasicBlock *DefaultSuccessor = TI->getSuccessor(1); + if (markEdgeExecutable(&BB, DefaultSuccessor)) + MadeChange = true; + + continue; + } + + if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) { + // Indirect branch with no successor ?. Its ok to assume it branches + // to no target. + if (IBR->getNumSuccessors() < 1) + continue; + + if (!getValueState(IBR->getAddress()).isUnknownOrUndef()) + continue; + + // If the input to SCCP is actually branch on undef, fix the undef to + // the first successor of the indirect branch. + if (isa<UndefValue>(IBR->getAddress())) { + IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0))); + markEdgeExecutable(&BB, IBR->getSuccessor(0)); + MadeChange = true; + continue; + } + + // Otherwise, it is a branch on a symbolic value which is currently + // considered to be undef. Make sure some edge is executable, so a + // branch on "undef" always flows somewhere. + // FIXME: IndirectBr on "undef" doesn't actually need to go anywhere: + // we can assume the branch has undefined behavior instead. + BasicBlock *DefaultSuccessor = IBR->getSuccessor(0); + if (markEdgeExecutable(&BB, DefaultSuccessor)) + MadeChange = true; + + continue; + } + + if (auto *SI = dyn_cast<SwitchInst>(TI)) { + if (!SI->getNumCases() || + !getValueState(SI->getCondition()).isUnknownOrUndef()) + continue; + + // If the input to SCCP is actually switch on undef, fix the undef to + // the first constant. + if (isa<UndefValue>(SI->getCondition())) { + SI->setCondition(SI->case_begin()->getCaseValue()); + markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor()); + MadeChange = true; + continue; + } + + // Otherwise, it is a branch on a symbolic value which is currently + // considered to be undef. Make sure some edge is executable, so a + // branch on "undef" always flows somewhere. + // FIXME: Distinguish between dead code and an LLVM "undef" value. + BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor(); + if (markEdgeExecutable(&BB, DefaultSuccessor)) + MadeChange = true; + + continue; + } + } + + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// +// SCCPSolver implementations +// +SCCPSolver::SCCPSolver( + const DataLayout &DL, + std::function<const TargetLibraryInfo &(Function &)> GetTLI, + LLVMContext &Ctx) + : Visitor(new SCCPInstVisitor(DL, std::move(GetTLI), Ctx)) {} + +SCCPSolver::~SCCPSolver() {} + +void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) { + return Visitor->addAnalysis(F, std::move(A)); +} + +bool SCCPSolver::markBlockExecutable(BasicBlock *BB) { + return Visitor->markBlockExecutable(BB); +} + +const PredicateBase *SCCPSolver::getPredicateInfoFor(Instruction *I) { + return Visitor->getPredicateInfoFor(I); +} + +DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); } + +void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) { + Visitor->trackValueOfGlobalVariable(GV); +} + +void SCCPSolver::addTrackedFunction(Function *F) { + Visitor->addTrackedFunction(F); +} + +void SCCPSolver::addToMustPreserveReturnsInFunctions(Function *F) { + Visitor->addToMustPreserveReturnsInFunctions(F); +} + +bool SCCPSolver::mustPreserveReturn(Function *F) { + return Visitor->mustPreserveReturn(F); +} + +void SCCPSolver::addArgumentTrackedFunction(Function *F) { + Visitor->addArgumentTrackedFunction(F); +} + +bool SCCPSolver::isArgumentTrackedFunction(Function *F) { + return Visitor->isArgumentTrackedFunction(F); +} + +void SCCPSolver::solve() { Visitor->solve(); } + +bool SCCPSolver::resolvedUndefsIn(Function &F) { + return Visitor->resolvedUndefsIn(F); +} + +bool SCCPSolver::isBlockExecutable(BasicBlock *BB) const { + return Visitor->isBlockExecutable(BB); +} + +bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { + return Visitor->isEdgeFeasible(From, To); +} + +std::vector<ValueLatticeElement> +SCCPSolver::getStructLatticeValueFor(Value *V) const { + return Visitor->getStructLatticeValueFor(V); +} + +void SCCPSolver::removeLatticeValueFor(Value *V) { + return Visitor->removeLatticeValueFor(V); +} + +const ValueLatticeElement &SCCPSolver::getLatticeValueFor(Value *V) const { + return Visitor->getLatticeValueFor(V); +} + +const MapVector<Function *, ValueLatticeElement> & +SCCPSolver::getTrackedRetVals() { + return Visitor->getTrackedRetVals(); +} + +const DenseMap<GlobalVariable *, ValueLatticeElement> & +SCCPSolver::getTrackedGlobals() { + return Visitor->getTrackedGlobals(); +} + +const SmallPtrSet<Function *, 16> SCCPSolver::getMRVFunctionsTracked() { + return Visitor->getMRVFunctionsTracked(); +} + +void SCCPSolver::markOverdefined(Value *V) { Visitor->markOverdefined(V); } + +bool SCCPSolver::isStructLatticeConstant(Function *F, StructType *STy) { + return Visitor->isStructLatticeConstant(F, STy); +} + +Constant *SCCPSolver::getConstant(const ValueLatticeElement &LV) const { + return Visitor->getConstant(LV); +} + +SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() { + return Visitor->getArgumentTrackedFunctions(); +} + +void SCCPSolver::markArgInFuncSpecialization(Function *F, Argument *A, + Constant *C) { + Visitor->markArgInFuncSpecialization(F, A, C); +} + +void SCCPSolver::markFunctionUnreachable(Function *F) { + Visitor->markFunctionUnreachable(F); +} + +void SCCPSolver::visit(Instruction *I) { Visitor->visit(I); } + +void SCCPSolver::visitCall(CallInst &I) { Visitor->visitCall(I); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp index c210d1c46077..5893ce15b129 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -123,8 +123,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { } } else { bool isFirstPred = true; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *PredBB = *PI; + for (BasicBlock *PredBB : predecessors(BB)) { Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp new file mode 100644 index 000000000000..6d995cf4c048 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -0,0 +1,177 @@ +//===- SampleProfileLoaderBaseUtil.cpp - Profile loader Util func ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SampleProfileLoader base utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" + +namespace llvm { + +cl::opt<unsigned> SampleProfileMaxPropagateIterations( + "sample-profile-max-propagate-iterations", cl::init(100), + cl::desc("Maximum number of iterations to go through when propagating " + "sample block/edge weights through the CFG.")); + +cl::opt<unsigned> SampleProfileRecordCoverage( + "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of records in the input profile " + "are matched to the IR.")); + +cl::opt<unsigned> SampleProfileSampleCoverage( + "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of samples in the input profile " + "are matched to the IR.")); + +cl::opt<bool> NoWarnSampleUnused( + "no-warn-sample-unused", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn off/on warnings about function with " + "samples but without debug information to use those samples. ")); + +namespace sampleprofutil { + +/// Return true if the given callsite is hot wrt to hot cutoff threshold. +/// +/// Functions that were inlined in the original binary will be represented +/// in the inline stack in the sample profile. If the profile shows that +/// the original inline decision was "good" (i.e., the callsite is executed +/// frequently), then we will recreate the inline decision and apply the +/// profile from the inlined callsite. +/// +/// To decide whether an inlined callsite is hot, we compare the callsite +/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is +/// regarded as hot if the count is above the cutoff value. +/// +/// When ProfileAccurateForSymsInList is enabled and profile symbol list +/// is present, functions in the profile symbol list but without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, + bool ProfAccForSymsInList) { + if (!CallsiteFS) + return false; // The callsite was not inlined in the original binary. + + assert(PSI && "PSI is expected to be non null"); + uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); + if (ProfAccForSymsInList) + return !PSI->isColdCount(CallsiteTotalSamples); + else + return PSI->isHotCount(CallsiteTotalSamples); +} + +/// Mark as used the sample record for the given function samples at +/// (LineOffset, Discriminator). +/// +/// \returns true if this is the first time we mark the given record. +bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS, + uint32_t LineOffset, + uint32_t Discriminator, + uint64_t Samples) { + LineLocation Loc(LineOffset, Discriminator); + unsigned &Count = SampleCoverage[FS][Loc]; + bool FirstTime = (++Count == 1); + if (FirstTime) + TotalUsedSamples += Samples; + return FirstTime; +} + +/// Return the number of sample records that were applied from this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const { + auto I = SampleCoverage.find(FS); + + // The size of the coverage map for FS represents the number of records + // that were marked used at least once. + unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0; + + // If there are inlined callsites in this function, count the samples found + // in the respective bodies. However, do not bother counting callees with 0 + // total samples, these are callees that were never invoked at runtime. + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList)) + Count += countUsedRecords(CalleeSamples, PSI); + } + + return Count; +} + +/// Return the number of sample records in the body of this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const { + unsigned Count = FS->getBodySamples().size(); + + // Only count records in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList)) + Count += countBodyRecords(CalleeSamples, PSI); + } + + return Count; +} + +/// Return the number of samples collected in the body of this profile. +/// +/// This count does not include samples from cold inlined callsites. +uint64_t +SampleCoverageTracker::countBodySamples(const FunctionSamples *FS, + ProfileSummaryInfo *PSI) const { + uint64_t Total = 0; + for (const auto &I : FS->getBodySamples()) + Total += I.second.getSamples(); + + // Only count samples in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList)) + Total += countBodySamples(CalleeSamples, PSI); + } + + return Total; +} + +/// Return the fraction of sample records used in this profile. +/// +/// The returned value is an unsigned integer in the range 0-100 indicating +/// the percentage of sample records that were used while applying this +/// profile to the associated function. +unsigned SampleCoverageTracker::computeCoverage(unsigned Used, + unsigned Total) const { + assert(Used <= Total && + "number of used records cannot exceed the total number of records"); + return Total > 0 ? Used * 100 / Total : 100; +} + +/// Create a global variable to flag FSDiscriminators are used. +void createFSDiscriminatorVariable(Module *M) { + const char *FSDiscriminatorVar = "__llvm_fs_discriminator__"; + if (M->getGlobalVariable(FSDiscriminatorVar)) + return; + + auto &Context = M->getContext(); + // Place this variable to llvm.used so it won't be GC'ed. + appendToUsed(*M, {new GlobalVariable(*M, Type::getInt1Ty(Context), true, + GlobalValue::WeakODRLinkage, + ConstantInt::getTrue(Context), + FSDiscriminatorVar)}); +} + +} // end of namespace sampleprofutil +} // end of namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 6dbfb0b61fea..3978e1e29825 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -29,6 +29,12 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X) +#else +#define SCEV_DEBUG_WITH_TYPE(TYPE, X) +#endif + using namespace llvm; cl::opt<unsigned> llvm::SCEVCheapExpansionBudget( @@ -55,7 +61,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // not allowed to move it. BasicBlock::iterator BIP = Builder.GetInsertPoint(); - Instruction *Ret = nullptr; + Value *Ret = nullptr; // Check to see if there is already a cast! for (User *U : V->users()) { @@ -76,20 +82,23 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // Create a new cast. if (!Ret) { - Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); - rememberInstruction(Ret); + SCEVInsertPointGuard Guard(Builder, this); + Builder.SetInsertPoint(&*IP); + Ret = Builder.CreateCast(Op, V, Ty, V->getName()); } // We assert at the end of the function since IP might point to an // instruction with different dominance properties than a cast // (an invoke for example) and not dominate BIP (but the cast does). - assert(SE.DT.dominates(Ret, &*BIP)); + assert(!isa<Instruction>(Ret) || + SE.DT.dominates(cast<Instruction>(Ret), &*BIP)); return Ret; } BasicBlock::iterator -SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) { +SCEVExpander::findInsertPointAfter(Instruction *I, + Instruction *MustDominate) const { BasicBlock::iterator IP = ++I->getIterator(); if (auto *II = dyn_cast<InvokeInst>(I)) IP = II->getNormalDest()->begin(); @@ -114,6 +123,34 @@ SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) { return IP; } +BasicBlock::iterator +SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const { + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. + if (Argument *A = dyn_cast<Argument>(V)) { + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa<BitCastInst>(IP) && + isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && + cast<BitCastInst>(IP)->getOperand(0) != A) || + isa<DbgInfoIntrinsic>(IP)) + ++IP; + return IP; + } + + // Cast the instruction immediately after the instruction. + if (Instruction *I = dyn_cast<Instruction>(V)) + return findInsertPointAfter(I, &*Builder.GetInsertPoint()); + + // Otherwise, this must be some kind of a constant, + // so let's plop this cast into the function's entry block. + assert(isa<Constant>(V) && + "Expected the cast argument to be a global/constant"); + return Builder.GetInsertBlock() + ->getParent() + ->getEntryBlock() + .getFirstInsertionPt(); +} + /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. @@ -172,22 +209,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { if (Constant *C = dyn_cast<Constant>(V)) return ConstantExpr::getCast(Op, C, Ty); - // Cast the argument at the beginning of the entry block, after - // any bitcasts of other arguments. - if (Argument *A = dyn_cast<Argument>(V)) { - BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); - while ((isa<BitCastInst>(IP) && - isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && - cast<BitCastInst>(IP)->getOperand(0) != A) || - isa<DbgInfoIntrinsic>(IP)) - ++IP; - return ReuseOrCreateCast(A, Ty, Op, IP); - } - - // Cast the instruction immediately after the instruction. - Instruction *I = cast<Instruction>(V); - BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint()); - return ReuseOrCreateCast(I, Ty, Op, IP); + // Try to reuse existing cast, or insert one. + return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V)); } /// InsertBinop - Insert the specified binary operator, doing a small amount @@ -430,8 +453,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, PointerType *PTy, Type *Ty, Value *V) { - Type *OriginalElTy = PTy->getElementType(); - Type *ElTy = OriginalElTy; SmallVector<Value *, 4> GepIndices; SmallVector<const SCEV *, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; @@ -442,93 +463,97 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Type *IntIdxTy = DL.getIndexType(PTy); - // Descend down the pointer's type and attempt to convert the other - // operands into GEP indices, at each level. The first index in a GEP - // indexes into the array implied by the pointer operand; the rest of - // the indices index into the element or field type selected by the - // preceding index. - for (;;) { - // If the scale size is not 0, attempt to factor out a scale for - // array indexing. - SmallVector<const SCEV *, 8> ScaledOps; - if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); - if (!ElSize->isZero()) { - SmallVector<const SCEV *, 8> NewOps; - for (const SCEV *Op : Ops) { - const SCEV *Remainder = SE.getConstant(Ty, 0); - if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { - // Op now has ElSize factored out. - ScaledOps.push_back(Op); - if (!Remainder->isZero()) - NewOps.push_back(Remainder); - AnyNonZeroIndices = true; - } else { - // The operand was not divisible, so add it to the list of operands - // we'll scan next iteration. - NewOps.push_back(Op); + // For opaque pointers, always generate i8 GEP. + if (!PTy->isOpaque()) { + // Descend down the pointer's type and attempt to convert the other + // operands into GEP indices, at each level. The first index in a GEP + // indexes into the array implied by the pointer operand; the rest of + // the indices index into the element or field type selected by the + // preceding index. + Type *ElTy = PTy->getElementType(); + for (;;) { + // If the scale size is not 0, attempt to factor out a scale for + // array indexing. + SmallVector<const SCEV *, 8> ScaledOps; + if (ElTy->isSized()) { + const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); + if (!ElSize->isZero()) { + SmallVector<const SCEV *, 8> NewOps; + for (const SCEV *Op : Ops) { + const SCEV *Remainder = SE.getConstant(Ty, 0); + if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of + // operands we'll scan next iteration. + NewOps.push_back(Op); + } + } + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); } - } - // If we made any changes, update Ops. - if (!ScaledOps.empty()) { - Ops = NewOps; - SimplifyAddOperands(Ops, Ty, SE); } } - } - // Record the scaled array index for this level of the type. If - // we didn't find any operands that could be factored, tentatively - // assume that element zero was selected (since the zero offset - // would obviously be folded away). - Value *Scaled = - ScaledOps.empty() - ? Constant::getNullValue(Ty) - : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false); - GepIndices.push_back(Scaled); - - // Collect struct field index operands. - while (StructType *STy = dyn_cast<StructType>(ElTy)) { - bool FoundFieldNo = false; - // An empty struct has no fields. - if (STy->getNumElements() == 0) break; - // Field offsets are known. See if a constant offset falls within any of - // the struct fields. - if (Ops.empty()) - break; - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) - if (SE.getTypeSizeInBits(C->getType()) <= 64) { - const StructLayout &SL = *DL.getStructLayout(STy); - uint64_t FullOffset = C->getValue()->getZExtValue(); - if (FullOffset < SL.getSizeInBytes()) { - unsigned ElIdx = SL.getElementContainingOffset(FullOffset); - GepIndices.push_back( - ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); - ElTy = STy->getTypeAtIndex(ElIdx); - Ops[0] = - SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); - AnyNonZeroIndices = true; - FoundFieldNo = true; + // Record the scaled array index for this level of the type. If + // we didn't find any operands that could be factored, tentatively + // assume that element zero was selected (since the zero offset + // would obviously be folded away). + Value *Scaled = + ScaledOps.empty() + ? Constant::getNullValue(Ty) + : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false); + GepIndices.push_back(Scaled); + + // Collect struct field index operands. + while (StructType *STy = dyn_cast<StructType>(ElTy)) { + bool FoundFieldNo = false; + // An empty struct has no fields. + if (STy->getNumElements() == 0) break; + // Field offsets are known. See if a constant offset falls within any of + // the struct fields. + if (Ops.empty()) + break; + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *DL.getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = + SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); + AnyNonZeroIndices = true; + FoundFieldNo = true; + } } + // If no struct field offsets were found, tentatively assume that + // field zero was selected (since the zero offset would obviously + // be folded away). + if (!FoundFieldNo) { + ElTy = STy->getTypeAtIndex(0u); + GepIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); } - // If no struct field offsets were found, tentatively assume that - // field zero was selected (since the zero offset would obviously - // be folded away). - if (!FoundFieldNo) { - ElTy = STy->getTypeAtIndex(0u); - GepIndices.push_back( - Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); } - } - if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) - ElTy = ATy->getElementType(); - else - // FIXME: Handle VectorType. - // E.g., If ElTy is scalable vector, then ElSize is not a compile-time - // constant, therefore can not be factored out. The generated IR is less - // ideal with base 'V' cast to i8* and do ugly getelementptr over that. - break; + if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) + ElTy = ATy->getElementType(); + else + // FIXME: Handle VectorType. + // E.g., If ElTy is scalable vector, then ElSize is not a compile-time + // constant, therefore can not be factored out. The generated IR is less + // ideal with base 'V' cast to i8* and do ugly getelementptr over that. + break; + } } // If none of the operands were convertible to proper GEP indices, cast @@ -536,8 +561,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // better than ptrtoint+arithmetic+inttoptr at least. if (!AnyNonZeroIndices) { // Cast the base to i8*. - V = InsertNoopCastOfTo(V, - Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + if (!PTy->isOpaque()) + V = InsertNoopCastOfTo(V, + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); assert(!isa<Instruction>(V) || SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); @@ -613,7 +639,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *Casted = V; if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); + Value *GEP = Builder.CreateGEP(PTy->getElementType(), Casted, GepIndices, + "scevgep"); Ops.push_back(SE.getUnknown(GEP)); } @@ -929,9 +956,8 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, // Addrec operands are always loop-invariant, so this can only happen // if there are instructions which haven't been hoisted. if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast<Instruction>(OI)) + for (Use &Op : llvm::drop_begin(IncV->operands())) + if (Instruction *OInst = dyn_cast<Instruction>(Op)) if (!SE.DT.dominates(OInst, IVIncInsertPos)) return false; } @@ -978,10 +1004,10 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, case Instruction::BitCast: return dyn_cast<Instruction>(IncV->getOperand(0)); case Instruction::GetElementPtr: - for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) { - if (isa<Constant>(*I)) + for (Use &U : llvm::drop_begin(IncV->operands())) { + if (isa<Constant>(U)) continue; - if (Instruction *OInst = dyn_cast<Instruction>(*I)) { + if (Instruction *OInst = dyn_cast<Instruction>(U)) { if (!SE.DT.dominates(OInst, InsertPos)) return nullptr; } @@ -1121,6 +1147,10 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep) { + // We can't transform to match a pointer PHI. + if (Phi->getType()->isPointerTy()) + return false; + Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); @@ -1139,8 +1169,7 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE, } // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. - if (SE.getAddExpr(Requested->getStart(), - SE.getNegativeSCEV(Requested)) == Phi) { + if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) { InvertStep = true; return true; } @@ -1209,7 +1238,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // We should not look for a incomplete PHI. Getting SCEV for a incomplete // PHI has no meaning at all. if (!PN.isComplete()) { - DEBUG_WITH_TYPE( + SCEV_DEBUG_WITH_TYPE( DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); continue; } @@ -1364,9 +1393,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; - // Remember this PHI, even in post-inc mode. + // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most + // effective when we are able to use an IV inserted here, so record it. InsertedValues.insert(PN); - + InsertedIVs.push_back(PN); return PN; } @@ -1551,8 +1581,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Rewrite an AddRec in terms of the canonical induction variable, if // its type is more narrow. if (CanonicalIV && - SE.getTypeSizeInBits(CanonicalIV->getType()) > - SE.getTypeSizeInBits(Ty)) { + SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) && + !S->getType()->isPointerTy()) { SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); @@ -1677,7 +1707,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { Value *V = expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false); - return Builder.CreatePtrToInt(V, S->getType()); + return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt, + GetOptimalInsertionPointForCastOf(V)); } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { @@ -1716,8 +1747,14 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::smax, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "smax"); + else { + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1739,8 +1776,14 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::umax, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "umax"); + else { + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1762,8 +1805,14 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::smin, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "smin"); + else { + Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1785,8 +1834,14 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); - Value *ICmp = Builder.CreateICmpULT(LHS, RHS); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); + Value *Sel; + if (Ty->isIntegerTy()) + Sel = Builder.CreateIntrinsic(Intrinsic::umin, {Ty}, {LHS, RHS}, + /*FMFSource=*/nullptr, "umin"); + else { + Value *ICmp = Builder.CreateICmpULT(LHS, RHS); + Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); + } LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1822,8 +1877,8 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); else { assert(Inst->getType()->isPointerTy()); - Tmp = cast<Instruction>( - Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user")); + Tmp = cast<Instruction>(Builder.CreatePtrToInt( + Inst, Type::getInt32Ty(Inst->getContext()), "tmp.lcssa.user")); } V = fixupLCSSAFormFor(Tmp, 0); @@ -1846,7 +1901,7 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { ScalarEvolution::ValueOffsetPair SCEVExpander::FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt) { - SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S); + auto *Set = SE.getSCEVValues(S); // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. if (CanonicalMode || !SE.containsAddRecurrence(S)) { @@ -2045,8 +2100,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, Phi->replaceAllUsesWith(V); DeadInsts.emplace_back(Phi); ++NumElim; - DEBUG_WITH_TYPE(DebugType, dbgs() - << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); + SCEV_DEBUG_WITH_TYPE(DebugType, + dbgs() << "INDVARS: Eliminated constant iv: " << *Phi + << '\n'); continue; } @@ -2103,9 +2159,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, TruncExpr == SE.getSCEV(IsomorphicInc) && SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) && hoistIVInc(OrigInc, IsomorphicInc)) { - DEBUG_WITH_TYPE(DebugType, - dbgs() << "INDVARS: Eliminated congruent iv.inc: " - << *IsomorphicInc << '\n'); + SCEV_DEBUG_WITH_TYPE( + DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); Value *NewInc = OrigInc; if (OrigInc->getType() != IsomorphicInc->getType()) { Instruction *IP = nullptr; @@ -2124,10 +2180,11 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } } } - DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " - << *Phi << '\n'); - DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: " - << *OrigPhiRef << '\n'); + SCEV_DEBUG_WITH_TYPE(DebugType, + dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi + << '\n'); + SCEV_DEBUG_WITH_TYPE( + DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n'); ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { @@ -2179,13 +2236,13 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, return None; } -template<typename T> static int costAndCollectOperands( +template<typename T> static InstructionCost costAndCollectOperands( const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl<SCEVOperand> &Worklist) { const T *S = cast<T>(WorkItem.S); - int Cost = 0; + InstructionCost Cost = 0; // Object to help map SCEV operands to expanded IR instructions. struct OperationIndices { OperationIndices(unsigned Opc, size_t min, size_t max) : @@ -2200,7 +2257,7 @@ template<typename T> static int costAndCollectOperands( // we know what the generated user(s) will be. SmallVector<OperationIndices, 2> Operations; - auto CastCost = [&](unsigned Opcode) { + auto CastCost = [&](unsigned Opcode) -> InstructionCost { Operations.emplace_back(Opcode, 0, 0); return TTI.getCastInstrCost(Opcode, S->getType(), S->getOperand(0)->getType(), @@ -2208,14 +2265,15 @@ template<typename T> static int costAndCollectOperands( }; auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, - unsigned MinIdx = 0, unsigned MaxIdx = 1) { + unsigned MinIdx = 0, + unsigned MaxIdx = 1) -> InstructionCost { Operations.emplace_back(Opcode, MinIdx, MaxIdx); return NumRequired * TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); }; - auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, - unsigned MinIdx, unsigned MaxIdx) { + auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx, + unsigned MaxIdx) -> InstructionCost { Operations.emplace_back(Opcode, MinIdx, MaxIdx); Type *OpType = S->getOperand(0)->getType(); return NumRequired * TTI.getCmpSelInstrCost( @@ -2262,6 +2320,7 @@ template<typename T> static int costAndCollectOperands( case scUMaxExpr: case scSMinExpr: case scUMinExpr: { + // FIXME: should this ask the cost for Intrinsic's? Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); break; @@ -2286,10 +2345,11 @@ template<typename T> static int costAndCollectOperands( // Much like with normal add expr, the polynominal will require // one less addition than the number of it's terms. - int AddCost = ArithCost(Instruction::Add, NumTerms - 1, - /*MinIdx*/1, /*MaxIdx*/1); + InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1, + /*MinIdx*/ 1, /*MaxIdx*/ 1); // Here, *each* one of those will require a multiplication. - int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + InstructionCost MulCost = + ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); Cost = AddCost + MulCost; // What is the degree of this polynominal? @@ -2320,10 +2380,10 @@ template<typename T> static int costAndCollectOperands( bool SCEVExpander::isHighCostExpansionHelper( const SCEVOperand &WorkItem, Loop *L, const Instruction &At, - int &BudgetRemaining, const TargetTransformInfo &TTI, + InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Processed, SmallVectorImpl<SCEVOperand> &Worklist) { - if (BudgetRemaining < 0) + if (Cost > Budget) return true; // Already run out of budget, give up. const SCEV *S = WorkItem.S; @@ -2353,17 +2413,16 @@ bool SCEVExpander::isHighCostExpansionHelper( return 0; const APInt &Imm = cast<SCEVConstant>(S)->getAPInt(); Type *Ty = S->getType(); - BudgetRemaining -= TTI.getIntImmCostInst( + Cost += TTI.getIntImmCostInst( WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind); - return BudgetRemaining < 0; + return Cost > Budget; } case scTruncate: case scPtrToInt: case scZeroExtend: case scSignExtend: { - int Cost = + Cost += costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist); - BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. } case scUDivExpr: { @@ -2379,10 +2438,8 @@ bool SCEVExpander::isHighCostExpansionHelper( SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) return false; // Consider it to be free. - int Cost = + Cost += costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist); - // Need to count the cost of this UDiv. - BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. } case scAddExpr: @@ -2395,17 +2452,16 @@ bool SCEVExpander::isHighCostExpansionHelper( "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) // than the number of it's terms. - int Cost = + Cost += costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist); - BudgetRemaining -= Cost; - return BudgetRemaining < 0; + return Cost > Budget; } case scAddRecExpr: { assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 && "Polynomial should be at least linear"); - BudgetRemaining -= costAndCollectOperands<SCEVAddRecExpr>( + Cost += costAndCollectOperands<SCEVAddRecExpr>( WorkItem, TTI, CostKind, Worklist); - return BudgetRemaining < 0; + return Cost > Budget; } } llvm_unreachable("Unknown SCEV kind!"); @@ -2473,7 +2529,10 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false); Value *NegStepValue = expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false); - Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false); + Value *StartValue = expandCodeForImpl( + isa<PointerType>(ARExpandTy) ? Start + : SE.getPtrToIntExpr(Start, ARExpandTy), + ARExpandTy, Loc, false); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); @@ -2675,14 +2734,13 @@ bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) return true; if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) - for (const Value *V : InsertionPoint->operand_values()) - if (V == U->getValue()) - return true; + if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue())) + return true; } return false; } -SCEVExpanderCleaner::~SCEVExpanderCleaner() { +void SCEVExpanderCleaner::cleanup() { // Result is used, nothing to remove. if (ResultUsed) return; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index de9560df9785..583bb379488e 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -57,11 +57,13 @@ #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -111,10 +113,6 @@ static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold( "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)")); -static cl::opt<bool> DupRet( - "simplifycfg-dup-ret", cl::Hidden, cl::init(false), - cl::desc("Duplicate return instructions into unconditional branches")); - static cl::opt<bool> HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block")); @@ -149,9 +147,10 @@ static cl::opt<unsigned> MaxSpeculationDepth( "speculatively executed instructions")); static cl::opt<int> -MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), - cl::desc("Max size of a block which is still considered " - "small enough to thread through")); + MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, + cl::init(10), + cl::desc("Max size of a block which is still considered " + "small enough to thread through")); // Two is chosen to allow one negation and a logical combine. static cl::opt<unsigned> @@ -235,7 +234,6 @@ class SimplifyCFGOpt { bool FoldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder); - bool simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool simplifySingleResume(ResumeInst *RI); bool simplifyCommonResume(ResumeInst *RI); @@ -246,12 +244,12 @@ class SimplifyCFGOpt { bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); - bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder); bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); - bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI); + bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI, + bool EqTermsOnly); bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, const TargetTransformInfo &TTI); bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, @@ -335,8 +333,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// which is assumed to be safe to speculate. TCC_Free means cheap, /// TCC_Basic means less cheap, and TCC_Expensive means prohibitively /// expensive. -static unsigned ComputeSpeculationCost(const User *I, - const TargetTransformInfo &TTI) { +static InstructionCost computeSpeculationCost(const User *I, + const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); @@ -349,19 +347,20 @@ static unsigned ComputeSpeculationCost(const User *I, /// /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to /// see if V (which must be an instruction) and its recursive operands -/// that do not dominate BB have a combined cost lower than CostRemaining and +/// that do not dominate BB have a combined cost lower than Budget and /// are non-trapping. If both are true, the instruction is inserted into the /// set and true is returned. /// /// The cost for most non-trapping instructions is defined as 1 except for /// Select whose cost is 2. /// -/// After this function returns, CostRemaining is decreased by the cost of +/// After this function returns, Cost is increased by the cost of /// V plus its non-dominating operands. If that cost is greater than -/// CostRemaining, false is returned and CostRemaining is undefined. -static bool DominatesMergePoint(Value *V, BasicBlock *BB, +/// Budget, false is returned and Cost is undefined. +static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl<Instruction *> &AggressiveInsts, - int &BudgetRemaining, + InstructionCost &Cost, + InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), @@ -404,7 +403,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I)) return false; - BudgetRemaining -= ComputeSpeculationCost(I, TTI); + Cost += computeSpeculationCost(I, TTI); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). @@ -412,14 +411,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // or other expensive operation. The speculation of an expensive instruction // is expected to be undone in CodeGenPrepare if the speculation has not // enabled further IR optimizations. - if (BudgetRemaining < 0 && - (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0)) + if (Cost > Budget && + (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 || + !Cost.isValid())) return false; // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. - for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI, + for (Use &Op : I->operands()) + if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. @@ -615,8 +615,8 @@ private: } // If we have "x ult 3", for example, then we can add 0,1,2 to the set. - ConstantRange Span = ConstantRange::makeAllowedICmpRegion( - ICI->getPredicate(), C->getValue()); + ConstantRange Span = + ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue()); // Shift the range if the compare is fed by an add. This is the range // compare idiom as emitted by instcombine. @@ -906,24 +906,27 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); - SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; auto *Successor = i->getCaseSuccessor(); - ++NumPerSuccessorCases[Successor]; + if (DTU) + ++NumPerSuccessorCases[Successor]; if (DeadCases.count(i->getCaseValue())) { Successor->removePredecessor(PredDef); SI.removeCase(i); - --NumPerSuccessorCases[Successor]; + if (DTU) + --NumPerSuccessorCases[Successor]; } } - std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, PredDef, I.first}); - if (DTU) + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, PredDef, I.first}); DTU->applyUpdates(Updates); + } LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; @@ -954,7 +957,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( if (!TheRealDest) TheRealDest = ThisDef; - SmallSetVector<BasicBlock *, 2> RemovedSuccs; + SmallPtrSet<BasicBlock *, 2> RemovedSuccs; // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; @@ -1080,7 +1083,10 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // For an analogous reason, we must also drop all the metadata whose // semantics we don't understand. We *can* preserve !annotation, because // it is tied to the instruction itself, not the value or position. - NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation); + // Similarly strip attributes on call parameters that may cause UB in + // location the call is moved to. + NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata( + LLVMContext::MD_annotation); PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst); NewBonusInst->takeName(&BonusInst); @@ -1093,8 +1099,13 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( (NewBonusInst->getName() + ".merge").str()); SSAUpdate.AddAvailableValue(BB, &BonusInst); SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); - for (Use &U : make_early_inc_range(BonusInst.uses())) - SSAUpdate.RewriteUseAfterInsertions(U); + for (Use &U : make_early_inc_range(BonusInst.uses())) { + auto *UI = cast<Instruction>(U.getUser()); + if (UI->getParent() != PredBlock) + SSAUpdate.RewriteUseAfterInsertions(U); + else // Use is in the same block as, and comes before, NewBonusInst. + SSAUpdate.RewriteUse(U); + } } } @@ -1103,7 +1114,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( BasicBlock *BB = TI->getParent(); BasicBlock *Pred = PTI->getParent(); - std::vector<DominatorTree::UpdateType> Updates; + SmallVector<DominatorTree::UpdateType, 32> Updates; // Figure out which 'cases' to copy from SI to PSI. std::vector<ValueEqualityComparisonCase> BBCases; @@ -1168,7 +1179,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( // Reconstruct the new switch statement we will be building. if (PredDefault != BBDefault) { PredDefault->removePredecessor(Pred); - if (PredDefault != BB) + if (DTU && PredDefault != BB) Updates.push_back({DominatorTree::Delete, Pred, PredDefault}); PredDefault = BBDefault; ++NewSuccessors[BBDefault]; @@ -1244,13 +1255,18 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( // Okay, at this point, we know which new successor Pred will get. Make // sure we update the number of entries in the PHI nodes for these // successors. + SmallPtrSet<BasicBlock *, 2> SuccsOfPred; + if (DTU) { + SuccsOfPred = {succ_begin(Pred), succ_end(Pred)}; + Updates.reserve(Updates.size() + NewSuccessors.size()); + } for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : NewSuccessors) { for (auto I : seq(0, NewSuccessor.second)) { (void)I; AddPredecessorToBlock(NewSuccessor.first, Pred, BB); } - if (!is_contained(successors(Pred), NewSuccessor.first)) + if (DTU && !SuccsOfPred.contains(NewSuccessor.first)) Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first}); } @@ -1290,18 +1306,21 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); - Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); + if (DTU) + Updates.push_back( + {DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); } NewSI->setSuccessor(i, InfLoopBlock); } - if (InfLoopBlock) - Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); + if (DTU) { + if (InfLoopBlock) + Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); - Updates.push_back({DominatorTree::Delete, Pred, BB}); + Updates.push_back({DominatorTree::Delete, Pred, BB}); - if (DTU) DTU->applyUpdates(Updates); + } ++NumFoldValueComparisonIntoPredecessors; return true; @@ -1368,9 +1387,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function -/// guarantees that BI's block dominates BB1 and BB2. +/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, +/// only perform hoisting in case both blocks only contain a terminator. In that +/// case, only the original BI will be replaced and selects for PHIs are added. bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + bool EqTermsOnly) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1379,6 +1401,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + // If either of the blocks has it's address taken, then we can't do this fold, + // because the code we'd hoist would no longer run when we jump into the block + // by it's address. + if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) + return false; + BasicBlock::iterator BB1_Itr = BB1->begin(); BasicBlock::iterator BB2_Itr = BB2->begin(); @@ -1407,6 +1435,20 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, ++NumHoistCommonCode; }); + // Check if only hoisting terminators is allowed. This does not add new + // instructions to the hoist location. + if (EqTermsOnly) { + // Skip any debug intrinsics, as they are free to hoist. + auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); + auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); + if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) + return false; + if (!I1NonDbg->isTerminator()) + return false; + // Now we know that we only need to hoist debug instrinsics and the + // terminator. Let the loop below handle those 2 cases. + } + do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1578,10 +1620,13 @@ HoistTerminator: // Update any PHI nodes in our new successors. for (BasicBlock *Succ : successors(BB1)) { AddPredecessorToBlock(Succ, BIParent, BB1); - Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + if (DTU) + Updates.push_back({DominatorTree::Insert, BIParent, Succ}); } - for (BasicBlock *Succ : successors(BI)) - Updates.push_back({DominatorTree::Delete, BIParent, Succ}); + + if (DTU) + for (BasicBlock *Succ : successors(BI)) + Updates.push_back({DominatorTree::Delete, BIParent, Succ}); EraseTerminatorAndDCECond(BI); if (DTU) @@ -1692,6 +1737,32 @@ static bool canSinkInstructions( })) return false; + // For calls to be sinkable, they must all be indirect, or have same callee. + // I.e. if we have two direct calls to different callees, we don't want to + // turn that into an indirect call. Likewise, if we have an indirect call, + // and a direct call, we don't actually want to have a single indirect call. + if (isa<CallBase>(I0)) { + auto IsIndirectCall = [](const Instruction *I) { + return cast<CallBase>(I)->isIndirectCall(); + }; + bool HaveIndirectCalls = any_of(Insts, IsIndirectCall); + bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall); + if (HaveIndirectCalls) { + if (!AllCallsAreIndirect) + return false; + } else { + // All callees must be identical. + Value *Callee = nullptr; + for (const Instruction *I : Insts) { + Value *CurrCallee = cast<CallBase>(I)->getCalledOperand(); + if (!Callee) + Callee = CurrCallee; + else if (Callee != CurrCallee) + return false; + } + } + } + for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { Value *Op = I0->getOperand(OI); if (Op->getType()->isTokenTy()) @@ -1707,11 +1778,6 @@ static bool canSinkInstructions( !canReplaceOperandWithVariable(I0, OI)) // We can't create a PHI from this GEP. return false; - // Don't create indirect calls! The called value is the final operand. - if (isa<CallBase>(I0) && OI == OE - 1) { - // FIXME: if the call was *already* indirect, we should do this. - return false; - } for (auto *I : Insts) PHIOperands[I].push_back(I->getOperand(OI)); } @@ -1871,6 +1937,20 @@ namespace { } } + void operator++() { + if (Fail) + return; + for (auto *&Inst : Insts) { + for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) + Inst = Inst->getNextNode(); + // Already at end of block. + if (!Inst) { + Fail = true; + return; + } + } + } + ArrayRef<Instruction*> operator * () const { return Insts; } @@ -1880,13 +1960,11 @@ namespace { /// Check whether BB's predecessors end with unconditional branches. If it is /// true, sink any common code from the predecessors to BB. -/// We also allow one predecessor to end with conditional branch (but no more -/// than one). static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU) { // We support two situations: // (1) all incoming arcs are unconditional - // (2) one incoming arc is conditional + // (2) there are non-unconditional incoming arcs // // (2) is very common in switch defaults and // else-if patterns; @@ -1926,15 +2004,13 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // [ end ] // SmallVector<BasicBlock*,4> UnconditionalPreds; - Instruction *Cond = nullptr; - for (auto *B : predecessors(BB)) { - auto *T = B->getTerminator(); - if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional()) - UnconditionalPreds.push_back(B); - else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond) - Cond = T; + bool HaveNonUnconditionalPredecessors = false; + for (auto *PredBB : predecessors(BB)) { + auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()); + if (PredBr && PredBr->isUnconditional()) + UnconditionalPreds.push_back(PredBB); else - return false; + HaveNonUnconditionalPredecessors = true; } if (UnconditionalPreds.size() < 2) return false; @@ -1945,7 +2021,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // carry on. If we can sink an instruction but need to PHI-merge some operands // (because they're not identical in each instruction) we add these to // PHIOperands. - unsigned ScanIdx = 0; + int ScanIdx = 0; SmallPtrSet<Value*,4> InstructionsToSink; DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands; LockstepReverseIterator LRI(UnconditionalPreds); @@ -1962,14 +2038,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, if (ScanIdx == 0) return false; - bool Changed = false; - + // Okay, we *could* sink last ScanIdx instructions. But how many can we + // actually sink before encountering instruction that is unprofitable to sink? auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { unsigned NumPHIdValues = 0; for (auto *I : *LRI) - for (auto *V : PHIOperands[I]) + for (auto *V : PHIOperands[I]) { if (InstructionsToSink.count(V) == 0) ++NumPHIdValues; + // FIXME: this check is overly optimistic. We may end up not sinking + // said instruction, due to the very same profitability check. + // See @creating_too_many_phis in sink-common-code.ll. + } LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); if ((NumPHIdValues % UnconditionalPreds.size()) != 0) @@ -1978,16 +2058,80 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, return NumPHIInsts <= 1; }; - if (Cond) { - // Check if we would actually sink anything first! This mutates the CFG and - // adds an extra block. The goal in doing this is to allow instructions that - // couldn't be sunk before to be sunk - obviously, speculatable instructions - // (such as trunc, add) can be sunk and predicated already. So we check that - // we're going to sink at least one non-speculatable instruction. + // We've determined that we are going to sink last ScanIdx instructions, + // and recorded them in InstructionsToSink. Now, some instructions may be + // unprofitable to sink. But that determination depends on the instructions + // that we are going to sink. + + // First, forward scan: find the first instruction unprofitable to sink, + // recording all the ones that are profitable to sink. + // FIXME: would it be better, after we detect that not all are profitable. + // to either record the profitable ones, or erase the unprofitable ones? + // Maybe we need to choose (at runtime) the one that will touch least instrs? + LRI.reset(); + int Idx = 0; + SmallPtrSet<Value *, 4> InstructionsProfitableToSink; + while (Idx < ScanIdx) { + if (!ProfitableToSinkInstruction(LRI)) { + // Too many PHIs would be created. + LLVM_DEBUG( + dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); + break; + } + InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end()); + --LRI; + ++Idx; + } + + // If no instructions can be sunk, early-return. + if (Idx == 0) + return false; + + // Did we determine that (only) some instructions are unprofitable to sink? + if (Idx < ScanIdx) { + // Okay, some instructions are unprofitable. + ScanIdx = Idx; + InstructionsToSink = InstructionsProfitableToSink; + + // But, that may make other instructions unprofitable, too. + // So, do a backward scan, do any earlier instructions become unprofitable? + assert(!ProfitableToSinkInstruction(LRI) && + "We already know that the last instruction is unprofitable to sink"); + ++LRI; + --Idx; + while (Idx >= 0) { + // If we detect that an instruction becomes unprofitable to sink, + // all earlier instructions won't be sunk either, + // so preemptively keep InstructionsProfitableToSink in sync. + // FIXME: is this the most performant approach? + for (auto *I : *LRI) + InstructionsProfitableToSink.erase(I); + if (!ProfitableToSinkInstruction(LRI)) { + // Everything starting with this instruction won't be sunk. + ScanIdx = Idx; + InstructionsToSink = InstructionsProfitableToSink; + } + ++LRI; + --Idx; + } + } + + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + + bool Changed = false; + + if (HaveNonUnconditionalPredecessors) { + // It is always legal to sink common instructions from unconditional + // predecessors. However, if not all predecessors are unconditional, + // this transformation might be pessimizing. So as a rule of thumb, + // don't do it unless we'd sink at least one non-speculatable instruction. + // See https://bugs.llvm.org/show_bug.cgi?id=30244 LRI.reset(); - unsigned Idx = 0; + int Idx = 0; bool Profitable = false; - while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) { + while (Idx < ScanIdx) { if (!isSafeToSpeculativelyExecute((*LRI)[0])) { Profitable = true; break; @@ -2019,7 +2163,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // sink presuming a later value will also be sunk, but stop half way through // and never actually sink it which means we produce more PHIs than intended. // This is unlikely in practice though. - unsigned SinkIdx = 0; + int SinkIdx = 0; for (; SinkIdx != ScanIdx; ++SinkIdx) { LLVM_DEBUG(dbgs() << "SINK: Sink: " << *UnconditionalPreds[0]->getTerminator()->getPrevNode() @@ -2028,12 +2172,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // Because we've sunk every instruction in turn, the current instruction to // sink is always at index 0. LRI.reset(); - if (!ProfitableToSinkInstruction(LRI)) { - // Too many PHIs would be created. - LLVM_DEBUG( - dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); - break; - } if (!sinkLastInstruction(UnconditionalPreds)) { LLVM_DEBUG( @@ -2087,6 +2225,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, return nullptr; Value *StorePtr = StoreToHoist->getPointerOperand(); + Type *StoreTy = StoreToHoist->getValueOperand()->getType(); // Look for a store to the same pointer in BrBB. unsigned MaxNumInstToLookAt = 9; @@ -2098,12 +2237,15 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, --MaxNumInstToLookAt; // Could be calling an instruction that affects memory like free(). - if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI)) + if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI)) return nullptr; if (auto *SI = dyn_cast<StoreInst>(&CurI)) { - // Found the previous store make sure it stores to the same location. - if (SI->getPointerOperand() == StorePtr) + // Found the previous store to same location and type. Make sure it is + // simple, to avoid introducing a spurious non-atomic write after an + // atomic write. + if (SI->getPointerOperand() == StorePtr && + SI->getValueOperand()->getType() == StoreTy && SI->isSimple()) // Found the previous store, return its value operand. return SI->getValueOperand(); return nullptr; // Unknown store. @@ -2118,7 +2260,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, - int &BudgetRemaining, + InstructionCost &Cost, const TargetTransformInfo &TTI) { TargetTransformInfo::TargetCostKind CostKind = BB->getParent()->hasMinSize() @@ -2135,9 +2277,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, if (ThenV == OrigV) continue; - BudgetRemaining -= - TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); // Don't convert to selects if we could remove undefined behavior instead. if (passingValueIsAlwaysUndefined(OrigV, &PN) || @@ -2153,9 +2294,9 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; - unsigned MaxCost = + InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0; + InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0; + InstructionCost MaxCost = 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; if (OrigCost + ThenCost > MaxCost) return false; @@ -2218,8 +2359,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, BasicBlock *BB = BI->getParent(); BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); - int BudgetRemaining = - PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + InstructionCost Budget = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; // If ThenBB is actually on the false edge of the conditional branch, remember // to swap the select operands later. @@ -2230,6 +2371,20 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, } assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); + // If the branch is non-unpredictable, and is predicted to *not* branch to + // the `then` block, then avoid speculating it. + if (!BI->getMetadata(LLVMContext::MD_unpredictable)) { + uint64_t TWeight, FWeight; + if (BI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) { + uint64_t EndWeight = Invert ? TWeight : FWeight; + BranchProbability BIEndProb = + BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight); + BranchProbability Likely = TTI.getPredictableBranchThreshold(); + if (BIEndProb >= Likely) + return false; + } + } + // Keep a count of how many times instructions are used within ThenBB when // they are candidates for sinking into ThenBB. Specifically: // - They are defined in BB, and @@ -2256,6 +2411,10 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // probability for ThenBB, which is fine since the optimization here takes // place regardless of the branch probability. if (isa<PseudoProbeInst>(I)) { + // The probe should be deleted so that it will not be over-counted when + // the samples collected on the non-conditional path are counted towards + // the conditional path. We leave it for the counts inference algorithm to + // figure out a proper count for an unknown probe. SpeculatedDbgIntrinsics.push_back(I); continue; } @@ -2272,7 +2431,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, TTI) > + computeSpeculationCost(I, TTI) > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; @@ -2283,8 +2442,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Do not hoist the instruction if any of its operands are defined but not // used in BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { - Instruction *OpI = dyn_cast<Instruction>(*i); + for (Use &Op : I->operands()) { + Instruction *OpI = dyn_cast<Instruction>(Op); if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) continue; // Not a candidate for sinking. @@ -2308,10 +2467,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Check that we can insert the selects and that it's not too expensive to do // so. bool Convert = SpeculatedStore != nullptr; + InstructionCost Cost = 0; Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, SpeculatedInstructions, - BudgetRemaining, TTI); - if (!Convert || BudgetRemaining < 0) + Cost, TTI); + if (!Convert || Cost > Budget) return false; // If we get here, we can hoist the instruction and if-convert. @@ -2335,10 +2495,12 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Conservatively strip all metadata on the instruction. Drop the debug loc // to avoid making it appear as if the condition is a constant, which would // be misleading while debugging. + // Similarly strip attributes that maybe dependent on condition we are + // hoisting above. for (auto &I : *ThenBB) { if (!SpeculatedStoreValue || &I != SpeculatedStore) I.setDebugLoc(DebugLoc()); - I.dropUnknownNonDebugMetadata(); + I.dropUndefImplyingAttrsAndUnknownMetadata(); } // Hoist the instructions. @@ -2382,19 +2544,32 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { int Size = 0; - for (Instruction &I : BB->instructionsWithoutDebug()) { - if (Size > MaxSmallBlockSize) - return false; // Don't clone large BB's. + SmallPtrSet<const Value *, 32> EphValues; + auto IsEphemeral = [&](const Value *V) { + if (isa<AssumeInst>(V)) + return true; + return isSafeToSpeculativelyExecute(V) && + all_of(V->users(), + [&](const User *U) { return EphValues.count(U); }); + }; + // Walk the loop in reverse so that we can identify ephemeral values properly + // (values only feeding assumes). + for (Instruction &I : reverse(BB->instructionsWithoutDebug())) { // Can't fold blocks that contain noduplicate or convergent calls. if (CallInst *CI = dyn_cast<CallInst>(&I)) if (CI->cannotDuplicate() || CI->isConvergent()) return false; + // Ignore ephemeral values which are deleted during codegen. + if (IsEphemeral(&I)) + EphValues.insert(&I); // We will delete Phis while threading, so Phis should not be accounted in - // block's size - if (!isa<PHINode>(I)) - ++Size; + // block's size. + else if (!isa<PHINode>(I)) { + if (Size++ > MaxSmallBlockSize) + return false; // Don't clone large BB's. + } // We can only support instructions that do not define values that are // live outside of the current basic block. @@ -2460,7 +2635,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", RealDest->getParent(), RealDest); BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); - Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); + if (DTU) + Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); // Update PHI nodes. @@ -2482,10 +2658,10 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, N->setName(BBI->getName() + ".c"); // Update operands due to translation. - for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { - DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i); + for (Use &Op : N->operands()) { + DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op); if (PI != TranslateMap.end()) - *i = PI->second; + Op = PI->second; } // Check for trivial simplification. @@ -2505,8 +2681,9 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, EdgeBB->getInstList().insert(InsertPt, N); // Register the new instruction with the assumption cache if necessary. - if (AC && match(N, m_Intrinsic<Intrinsic::assume>())) - AC->registerAssumption(cast<IntrinsicInst>(N)); + if (auto *Assume = dyn_cast<AssumeInst>(N)) + if (AC) + AC->registerAssumption(Assume); } } @@ -2519,11 +2696,12 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, PredBBTI->setSuccessor(i, EdgeBB); } - Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); - Updates.push_back({DominatorTree::Delete, PredBB, BB}); + if (DTU) { + Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); - if (DTU) DTU->applyUpdates(Updates); + } // Recurse, simplifying any other constants. return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true; @@ -2545,12 +2723,54 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, BasicBlock *BB = PN->getParent(); BasicBlock *IfTrue, *IfFalse; - Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); - if (!IfCond || - // Don't bother if the branch will be constant folded trivially. - isa<ConstantInt>(IfCond)) + BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse); + if (!DomBI) + return false; + Value *IfCond = DomBI->getCondition(); + // Don't bother if the branch will be constant folded trivially. + if (isa<ConstantInt>(IfCond)) return false; + BasicBlock *DomBlock = DomBI->getParent(); + SmallVector<BasicBlock *, 2> IfBlocks; + llvm::copy_if( + PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) { + return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional(); + }); + assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) && + "Will have either one or two blocks to speculate."); + + // If the branch is non-unpredictable, see if we either predictably jump to + // the merge bb (if we have only a single 'then' block), or if we predictably + // jump to one specific 'then' block (if we have two of them). + // It isn't beneficial to speculatively execute the code + // from the block that we know is predictably not entered. + if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) { + uint64_t TWeight, FWeight; + if (DomBI->extractProfMetadata(TWeight, FWeight) && + (TWeight + FWeight) != 0) { + BranchProbability BITrueProb = + BranchProbability::getBranchProbability(TWeight, TWeight + FWeight); + BranchProbability Likely = TTI.getPredictableBranchThreshold(); + BranchProbability BIFalseProb = BITrueProb.getCompl(); + if (IfBlocks.size() == 1) { + BranchProbability BIBBProb = + DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb; + if (BIBBProb >= Likely) + return false; + } else { + if (BITrueProb >= Likely || BIFalseProb >= Likely) + return false; + } + } + } + + // Don't try to fold an unreachable block. For example, the phi node itself + // can't be the candidate if-condition for a select that we want to form. + if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond)) + if (IfCondPhiInst->getParent() == BB) + return false; + // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target @@ -2565,7 +2785,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction *, 4> AggressiveInsts; - int BudgetRemaining = + InstructionCost Cost = 0; + InstructionCost Budget = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; bool Changed = false; @@ -2578,10 +2799,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, continue; } - if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, - BudgetRemaining, TTI) || - !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, - BudgetRemaining, TTI)) + if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, + Cost, Budget, TTI) || + !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, + Cost, Budget, TTI)) return Changed; } @@ -2600,13 +2821,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, return match(V0, m_Not(m_Value())) && match(V1, Invertible); }; - // Don't fold i1 branches on PHIs which contain binary operators, unless one - // of the incoming values is an 'not' and another one is freely invertible. + // Don't fold i1 branches on PHIs which contain binary operators or + // (possibly inverted) select form of or/ands, unless one of + // the incoming values is an 'not' and another one is freely invertible. // These can often be turned into switches and other things. + auto IsBinOpOrAnd = [](Value *V) { + return match( + V, m_CombineOr( + m_BinOp(), + m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()), + m_Select(m_Value(), m_Value(), m_ImmConstant())))); + }; if (PN->getType()->isIntegerTy(1) && - (isa<BinaryOperator>(PN->getIncomingValue(0)) || - isa<BinaryOperator>(PN->getIncomingValue(1)) || - isa<BinaryOperator>(IfCond)) && + (IsBinOpOrAnd(PN->getIncomingValue(0)) || + IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) && !CanHoistNotFromBothValues(PN->getIncomingValue(0), PN->getIncomingValue(1))) return Changed; @@ -2615,14 +2843,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // in the predecessor blocks can be promoted as well. If not, we won't be able // to get rid of the control flow, so it's not worth promoting to select // instructions. - BasicBlock *DomBlock = nullptr; - BasicBlock *IfBlock1 = PN->getIncomingBlock(0); - BasicBlock *IfBlock2 = PN->getIncomingBlock(1); - if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { - IfBlock1 = nullptr; - } else { - DomBlock = *pred_begin(IfBlock1); - for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) + for (BasicBlock *IfBlock : IfBlocks) + for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I) if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && !isa<PseudoProbeInst>(I)) { // This is not an aggressive instruction that we can promote. @@ -2630,22 +2852,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // the xform is not worth it. return Changed; } - } - if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { - IfBlock2 = nullptr; - } else { - DomBlock = *pred_begin(IfBlock2); - for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && - !isa<PseudoProbeInst>(I)) { - // This is not an aggressive instruction that we can promote. - // Because of this, we won't be able to get rid of the control flow, so - // the xform is not worth it. - return Changed; - } - } - assert(DomBlock && "Failed to find root DomBlock"); + // If either of the blocks has it's address taken, we can't do this fold. + if (any_of(IfBlocks, + [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); })) + return Changed; LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() @@ -2653,16 +2864,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. - Instruction *InsertPt = DomBlock->getTerminator(); - IRBuilder<NoFolder> Builder(InsertPt); // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. - if (IfBlock1) - hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1); - if (IfBlock2) - hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2); + for (BasicBlock *IfBlock : IfBlocks) + hoistAllInstructionsInto(DomBlock, DomBI, IfBlock); + IRBuilder<NoFolder> Builder(DomBI); // Propagate fast-math-flags from phi nodes to replacement selects. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { @@ -2670,20 +2878,18 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, Builder.setFastMathFlags(PN->getFastMathFlags()); // Change the PHI node into a select instruction. - Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); - Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); + Value *TrueVal = PN->getIncomingValueForBlock(IfTrue); + Value *FalseVal = PN->getIncomingValueForBlock(IfFalse); - Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt); + Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI); PN->replaceAllUsesWith(Sel); Sel->takeName(PN); PN->eraseFromParent(); } - // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement + // At this point, all IfBlocks are empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. - Instruction *OldTI = DomBlock->getTerminator(); - Builder.SetInsertPoint(OldTI); Builder.CreateBr(BB); SmallVector<DominatorTree::UpdateType, 3> Updates; @@ -2693,115 +2899,24 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, Updates.push_back({DominatorTree::Delete, DomBlock, Successor}); } - OldTI->eraseFromParent(); + DomBI->eraseFromParent(); if (DTU) DTU->applyUpdates(Updates); return true; } -/// If we found a conditional branch that goes to two returning blocks, -/// try to merge them together into one return, -/// introducing a select if the return values disagree. -bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, - IRBuilder<> &Builder) { - auto *BB = BI->getParent(); - assert(BI->isConditional() && "Must be a conditional branch"); - BasicBlock *TrueSucc = BI->getSuccessor(0); - BasicBlock *FalseSucc = BI->getSuccessor(1); - // NOTE: destinations may match, this could be degenerate uncond branch. - ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); - ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); - - // Check to ensure both blocks are empty (just a return) or optionally empty - // with PHI nodes. If there are other instructions, merging would cause extra - // computation on one path or the other. - if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator()) - return false; - if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator()) - return false; - - Builder.SetInsertPoint(BI); - // Okay, we found a branch that is going to two return nodes. If - // there is no return value for this function, just change the - // branch into a return. - if (FalseRet->getNumOperands() == 0) { - TrueSucc->removePredecessor(BB); - FalseSucc->removePredecessor(BB); - Builder.CreateRetVoid(); - EraseTerminatorAndDCECond(BI); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); - if (TrueSucc != FalseSucc) - Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); - DTU->applyUpdates(Updates); - } - return true; - } - - // Otherwise, figure out what the true and false return values are - // so we can insert a new select instruction. - Value *TrueValue = TrueRet->getReturnValue(); - Value *FalseValue = FalseRet->getReturnValue(); - - // Unwrap any PHI nodes in the return blocks. - if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) - if (TVPN->getParent() == TrueSucc) - TrueValue = TVPN->getIncomingValueForBlock(BB); - if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) - if (FVPN->getParent() == FalseSucc) - FalseValue = FVPN->getIncomingValueForBlock(BB); - - // In order for this transformation to be safe, we must be able to - // unconditionally execute both operands to the return. This is - // normally the case, but we could have a potentially-trapping - // constant expression that prevents this transformation from being - // safe. - if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue)) - if (TCV->canTrap()) - return false; - if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) - if (FCV->canTrap()) - return false; - - // Okay, we collected all the mapped values and checked them for sanity, and - // defined to really do this transformation. First, update the CFG. - TrueSucc->removePredecessor(BB); - FalseSucc->removePredecessor(BB); - - // Insert select instructions where needed. - Value *BrCond = BI->getCondition(); - if (TrueValue) { - // Insert a select if the results differ. - if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) { - } else if (isa<UndefValue>(TrueValue)) { - TrueValue = FalseValue; - } else { - TrueValue = - Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI); - } - } - - Value *RI = - !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); - - (void)RI; - - LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" - << "\n " << *BI << "\nNewRet = " << *RI << "\nTRUEBLOCK: " - << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc); - - EraseTerminatorAndDCECond(BI); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); - if (TrueSucc != FalseSucc) - Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); - DTU->applyUpdates(Updates); - } - - return true; +static Value *createLogicalOp(IRBuilderBase &Builder, + Instruction::BinaryOps Opc, Value *LHS, + Value *RHS, const Twine &Name = "") { + // Try to relax logical op to binary op. + if (impliesPoison(RHS, LHS)) + return Builder.CreateBinOp(Opc, LHS, RHS, Name); + if (Opc == Instruction::And) + return Builder.CreateLogicalAnd(LHS, RHS, Name); + if (Opc == Instruction::Or) + return Builder.CreateLogicalOr(LHS, RHS, Name); + llvm_unreachable("Invalid logical opcode"); } /// Return true if either PBI or BI has branch weight available, and store @@ -2827,30 +2942,53 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, } } -// Determine if the two branches share a common destination, -// and deduce a glue that we need to use to join branch's conditions -// to arrive at the common destination. +/// Determine if the two branches share a common destination and deduce a glue +/// that joins the branches' conditions to arrive at the common destination if +/// that would be profitable. static Optional<std::pair<Instruction::BinaryOps, bool>> -CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { +shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, + const TargetTransformInfo *TTI) { assert(BI && PBI && BI->isConditional() && PBI->isConditional() && "Both blocks must end with a conditional branches."); assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && "PredBB must be a predecessor of BB."); - if (PBI->getSuccessor(0) == BI->getSuccessor(0)) - return {{Instruction::Or, false}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) - return {{Instruction::And, false}}; - else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) - return {{Instruction::And, true}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) - return {{Instruction::Or, true}}; + // We have the potential to fold the conditions together, but if the + // predecessor branch is predictable, we may not want to merge them. + uint64_t PTWeight, PFWeight; + BranchProbability PBITrueProb, Likely; + if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) && + PBI->extractProfMetadata(PTWeight, PFWeight) && + (PTWeight + PFWeight) != 0) { + PBITrueProb = + BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight); + Likely = TTI->getPredictableBranchThreshold(); + } + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::Or, false}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::And, false}}; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::And, true}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::Or, true}}; + } return None; } -static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, +static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI) { BasicBlock *BB = BI->getParent(); BasicBlock *PredBlock = PBI->getParent(); @@ -2858,7 +2996,7 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, Instruction::BinaryOps Opc; bool InvertPredCond; std::tie(Opc, InvertPredCond) = - *CheckIfCondBranchesShareCommonDestination(BI, PBI); + *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI); LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); @@ -2949,9 +3087,9 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, // Now that the Cond was cloned into the predecessor basic block, // or/and the two conditions together. - Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp( - Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond")); - PBI->setCondition(NewCond); + Value *BICond = VMap[BI->getCondition()]; + PBI->setCondition( + createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond")); // Copy any debug value intrinsics into the end of PredBlock. for (Instruction &I : *BB) { @@ -2980,11 +3118,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, return false; BasicBlock *BB = BI->getParent(); - - const unsigned PredCount = pred_size(BB); - - bool Changed = false; - TargetTransformInfo::TargetCostKind CostKind = BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; @@ -2993,49 +3126,24 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) - return Changed; - - // Only allow this transformation if computing the condition doesn't involve - // too many instructions and these involved instructions can be executed - // unconditionally. We denote all involved instructions except the condition - // as "bonus instructions", and only allow this transformation when the - // number of the bonus instructions we'll need to create when cloning into - // each predecessor does not exceed a certain threshold. - unsigned NumBonusInsts = 0; - for (Instruction &I : *BB) { - // Don't check the branch condition comparison itself. - if (&I == Cond) - continue; - // Ignore dbg intrinsics, and the terminator. - if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I)) - continue; - // I must be safe to execute unconditionally. - if (!isSafeToSpeculativelyExecute(&I)) - return Changed; - - // Account for the cost of duplicating this instruction into each - // predecessor. - NumBonusInsts += PredCount; - // Early exits once we reach the limit. - if (NumBonusInsts > BonusInstThreshold) - return Changed; - } + return false; // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0))) if (CE->canTrap()) - return Changed; + return false; if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) if (CE->canTrap()) - return Changed; + return false; // Finally, don't infinitely unroll conditional loops. if (is_contained(successors(BB), BB)) - return Changed; + return false; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *PredBlock = *PI; + // With which predecessors will we want to deal with? + SmallVector<BasicBlock *, 8> Preds; + for (BasicBlock *PredBlock : predecessors(BB)) { BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); // Check that we have two conditional branches. If there is a PHI node in @@ -3047,8 +3155,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond; - if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) - std::tie(Opc, InvertPredCond) = *Recepie; + if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI)) + std::tie(Opc, InvertPredCond) = *Recipe; else continue; @@ -3056,7 +3164,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, // transformation. if (TTI) { Type *Ty = BI->getCondition()->getType(); - unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); + InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || !isa<CmpInst>(PBI->getCondition()))) Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); @@ -3065,9 +3173,48 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, continue; } - return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU); + // Ok, we do want to deal with this predecessor. Record it. + Preds.emplace_back(PredBlock); } - return Changed; + + // If there aren't any predecessors into which we can fold, + // don't bother checking the cost. + if (Preds.empty()) + return false; + + // Only allow this transformation if computing the condition doesn't involve + // too many instructions and these involved instructions can be executed + // unconditionally. We denote all involved instructions except the condition + // as "bonus instructions", and only allow this transformation when the + // number of the bonus instructions we'll need to create when cloning into + // each predecessor does not exceed a certain threshold. + unsigned NumBonusInsts = 0; + const unsigned PredCount = Preds.size(); + for (Instruction &I : *BB) { + // Don't check the branch condition comparison itself. + if (&I == Cond) + continue; + // Ignore dbg intrinsics, and the terminator. + if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I)) + continue; + // I must be safe to execute unconditionally. + if (!isSafeToSpeculativelyExecute(&I)) + return false; + + // Account for the cost of duplicating this instruction into each + // predecessor. + NumBonusInsts += PredCount; + // Early exits once we reach the limit. + if (NumBonusInsts > BonusInstThreshold) + return false; + } + + // Ok, we have the budget. Perform the transformation. + for (BasicBlock *PredBlock : Preds) { + auto *PBI = cast<BranchInst>(PredBlock->getTerminator()); + return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI); + } + return false; } // If there is only one store in BB1 and BB2, return it, otherwise return @@ -3190,7 +3337,8 @@ static bool mergeConditionalStoreToAddress( // Heuristic: if the block can be if-converted/phi-folded and the // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to // thread this store. - int BudgetRemaining = + InstructionCost Cost = 0; + InstructionCost Budget = PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; for (auto &I : BB->instructionsWithoutDebug()) { // Consider terminator instruction to be free. @@ -3206,11 +3354,11 @@ static bool mergeConditionalStoreToAddress( return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. - BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); - if (BudgetRemaining < 0) + Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + if (Cost > Budget) return false; // Eagerly refuse to fold as soon as we're out of budget. } - assert(BudgetRemaining >= 0 && + assert(Cost <= Budget && "When we run out of budget we will eagerly return from within the " "per-instruction loop."); return true; @@ -3594,7 +3742,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); - Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); + if (DTU) + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); OtherDest = InfLoopBlock; } @@ -3614,18 +3763,20 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, BICond = Builder.CreateNot(BICond, BICond->getName() + ".not"); // Merge the conditions. - Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); + Value *Cond = + createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge"); // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); - Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); + if (DTU) { + Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); + Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); - if (DTU) DTU->applyUpdates(Updates); + } // Update branch weight for PBI. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; @@ -3714,7 +3865,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; - SmallSetVector<BasicBlock *, 2> RemovedSuccessors; + SmallPtrSet<BasicBlock *, 2> RemovedSuccessors; // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { @@ -3944,17 +4095,19 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( SIW.setSuccessorWeight(0, *NewW); } SIW.addCase(Cst, NewBB, NewW); - Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + if (DTU) + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); } // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); Builder.CreateBr(SuccBlock); - Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); PHIUse->addIncoming(NewCst, NewBB); - if (DTU) + if (DTU) { + Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); DTU->applyUpdates(Updates); + } return true; } @@ -4011,11 +4164,6 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, BasicBlock *BB = BI->getParent(); - // MSAN does not like undefs as branch condition which can be introduced - // with "explicit branch". - if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) - return false; - LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); @@ -4033,6 +4181,16 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, Instruction *OldTI = BB->getTerminator(); Builder.SetInsertPoint(OldTI); + // There can be an unintended UB if extra values are Poison. Before the + // transformation, extra values may not be evaluated according to the + // condition, and it will not raise UB. But after transformation, we are + // evaluating extra values before checking the condition, and it will raise + // UB. It can be solved by adding freeze instruction to extra values. + AssumptionCache *AC = Options.AC; + + if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr)) + ExtraCase = Builder.CreateFreeze(ExtraCase); + if (TrueWhenEqual) Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else @@ -4040,7 +4198,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, OldTI->eraseFromParent(); - Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); + if (DTU) + Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. @@ -4162,9 +4321,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) BB->removePredecessor(TrivialBB, true); - for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); - PI != PE;) { - BasicBlock *Pred = *PI++; + for (BasicBlock *Pred : + llvm::make_early_inc_range(predecessors(TrivialBB))) { removeUnwindEdge(Pred, DTU); ++NumInvokes; } @@ -4181,12 +4339,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { } // Delete the resume block if all its predecessors have been removed. - if (pred_empty(BB)) { - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); - } + if (pred_empty(BB)) + DeleteDeadBlock(BB, DTU); return !TrivialUnwindBlocks.empty(); } @@ -4204,17 +4358,13 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { return false; // Turn all invokes that unwind here into calls and delete the basic block. - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { - BasicBlock *Pred = *PI++; + for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) { removeUnwindEdge(Pred, DTU); ++NumInvokes; } // The landingpad is now unreachable. Zap it. - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); + DeleteDeadBlock(BB, DTU); return true; } @@ -4256,12 +4406,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { if (UnwindDest) { // First, go through the PHI nodes in UnwindDest and update any nodes that // reference the block we are removing - for (BasicBlock::iterator I = UnwindDest->begin(), - IE = DestEHPad->getIterator(); - I != IE; ++I) { - PHINode *DestPN = cast<PHINode>(I); - - int Idx = DestPN->getBasicBlockIndex(BB); + for (PHINode &DestPN : UnwindDest->phis()) { + int Idx = DestPN.getBasicBlockIndex(BB); // Since BB unwinds to UnwindDest, it has to be in the PHI node. assert(Idx != -1); // This PHI node has an incoming value that corresponds to a control @@ -4275,40 +4421,21 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { // predecessors must unwind to these blocks, and since no instruction // can have multiple unwind destinations, there will be no overlap in // incoming blocks between SrcPN and DestPN. - Value *SrcVal = DestPN->getIncomingValue(Idx); + Value *SrcVal = DestPN.getIncomingValue(Idx); PHINode *SrcPN = dyn_cast<PHINode>(SrcVal); - // Remove the entry for the block we are deleting. - DestPN->removeIncomingValue(Idx, false); - - if (SrcPN && SrcPN->getParent() == BB) { - // If the incoming value was a PHI node in the cleanup pad we are - // removing, we need to merge that PHI node's incoming values into - // DestPN. - for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); - SrcIdx != SrcE; ++SrcIdx) { - DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx), - SrcPN->getIncomingBlock(SrcIdx)); - } - } else { - // Otherwise, the incoming value came from above BB and - // so we can just reuse it. We must associate all of BB's - // predecessors with this value. - for (auto *pred : predecessors(BB)) { - DestPN->addIncoming(SrcVal, pred); - } + bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB; + for (auto *Pred : predecessors(BB)) { + Value *Incoming = + NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal; + DestPN.addIncoming(Incoming, Pred); } } // Sink any remaining PHI nodes directly into UnwindDest. Instruction *InsertPt = DestEHPad; - for (BasicBlock::iterator I = BB->begin(), - IE = BB->getFirstNonPHI()->getIterator(); - I != IE;) { - // The iterator must be incremented here because the instructions are - // being moved to another block. - PHINode *PN = cast<PHINode>(I++); - if (PN->use_empty() || !PN->isUsedOutsideOfBlock(BB)) + for (PHINode &PN : make_early_inc_range(BB->phis())) { + if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB)) // If the PHI node has no uses or all of its uses are in this basic // block (meaning they are debug or lifetime intrinsics), just leave // it. It will be erased when we erase BB below. @@ -4320,36 +4447,40 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { // BB. In this case, the PHI value must reference itself. for (auto *pred : predecessors(UnwindDest)) if (pred != BB) - PN->addIncoming(PN, pred); - PN->moveBefore(InsertPt); + PN.addIncoming(&PN, pred); + PN.moveBefore(InsertPt); + // Also, add a dummy incoming value for the original BB itself, + // so that the PHI is well-formed until we drop said predecessor. + PN.addIncoming(UndefValue::get(PN.getType()), BB); } } std::vector<DominatorTree::UpdateType> Updates; - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { - // The iterator must be updated here because we are removing this pred. - BasicBlock *PredBB = *PI++; + // We use make_early_inc_range here because we will remove all predecessors. + for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) { if (UnwindDest == nullptr) { - if (DTU) + if (DTU) { DTU->applyUpdates(Updates); - Updates.clear(); + Updates.clear(); + } removeUnwindEdge(PredBB, DTU); ++NumInvokes; } else { + BB->removePredecessor(PredBB); Instruction *TI = PredBB->getTerminator(); TI->replaceUsesOfWith(BB, UnwindDest); - Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); - Updates.push_back({DominatorTree::Delete, PredBB, BB}); + if (DTU) { + Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); + } } } - if (DTU) { + if (DTU) DTU->applyUpdates(Updates); - DTU->deleteBB(BB); - } else - // The cleanup pad is now unreachable. Zap it. - BB->eraseFromParent(); + + DeleteDeadBlock(BB, DTU); return true; } @@ -4403,61 +4534,7 @@ bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) { return false; } -bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { - BasicBlock *BB = RI->getParent(); - if (!BB->getFirstNonPHIOrDbg()->isTerminator()) - return false; - - // Find predecessors that end with branches. - SmallVector<BasicBlock *, 8> UncondBranchPreds; - SmallVector<BranchInst *, 8> CondBranchPreds; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - Instruction *PTI = P->getTerminator(); - if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { - if (BI->isUnconditional()) - UncondBranchPreds.push_back(P); - else - CondBranchPreds.push_back(BI); - } - } - - // If we found some, do the transformation! - if (!UncondBranchPreds.empty() && DupRet) { - while (!UncondBranchPreds.empty()) { - BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - LLVM_DEBUG(dbgs() << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred); - (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU); - } - - // If we eliminated all predecessors of the block, delete the block now. - if (pred_empty(BB)) { - // We know there are no successors, so just nuke the block. - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); - } - - return true; - } - - // Check out all of the conditional branches going to this return - // instruction. If any of them just select between returns, change the - // branch itself into a select/return pair. - while (!CondBranchPreds.empty()) { - BranchInst *BI = CondBranchPreds.pop_back_val(); - - // Check to see if the non-BB successor is also a return block. - if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && - isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && - SimplifyCondBranchToTwoReturns(BI, Builder)) - return true; - } - return false; -} - +// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()! bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); @@ -4468,46 +4545,19 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { while (UI->getIterator() != BB->begin()) { BasicBlock::iterator BBI = UI->getIterator(); --BBI; - // Do not delete instructions that can have side effects which might cause - // the unreachable to not be reachable; specifically, calls and volatile - // operations may have this effect. - if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) - break; - if (BBI->mayHaveSideEffects()) { - if (auto *SI = dyn_cast<StoreInst>(BBI)) { - if (SI->isVolatile()) - break; - } else if (auto *LI = dyn_cast<LoadInst>(BBI)) { - if (LI->isVolatile()) - break; - } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { - if (RMWI->isVolatile()) - break; - } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { - if (CXI->isVolatile()) - break; - } else if (isa<CatchPadInst>(BBI)) { - // A catchpad may invoke exception object constructors and such, which - // in some languages can be arbitrary code, so be conservative by - // default. - // For CoreCLR, it just involves a type test, so can be removed. - if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) != - EHPersonality::CoreCLR) - break; - } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && - !isa<LandingPadInst>(BBI)) { - break; - } - // Note that deleting LandingPad's here is in fact okay, although it - // involves a bit of subtle reasoning. If this inst is a LandingPad, - // all the predecessors of this block will be the unwind edges of Invokes, - // and we can therefore guarantee this block will be erased. - } + if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI)) + break; // Can not drop any more instructions. We're done here. + // Otherwise, this instruction can be freely erased, + // even if it is not side-effect free. + + // Note that deleting EH's here is in fact okay, although it involves a bit + // of subtle reasoning. If this inst is an EH, all the predecessors of this + // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn, + // and we can therefore guarantee this block will be erased. // Delete this instruction (any uses are guaranteed to be dead) - if (!BBI->use_empty()) - BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); BBI->eraseFromParent(); Changed = true; } @@ -4548,7 +4598,8 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { EraseTerminatorAndDCECond(BI); Changed = true; } - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + if (DTU) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { SwitchInstProfUpdateWrapper SU(*SI); for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { @@ -4562,21 +4613,23 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { Changed = true; } // Note that the default destination can't be removed! - if (SI->getDefaultDest() != BB) + if (DTU && SI->getDefaultDest() != BB) Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *II = dyn_cast<InvokeInst>(TI)) { if (II->getUnwindDest() == BB) { - if (DTU) + if (DTU) { DTU->applyUpdates(Updates); - Updates.clear(); + Updates.clear(); + } removeUnwindEdge(TI->getParent(), DTU); Changed = true; } } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { if (CSI->getUnwindDest() == BB) { - if (DTU) + if (DTU) { DTU->applyUpdates(Updates); - Updates.clear(); + Updates.clear(); + } removeUnwindEdge(TI->getParent(), DTU); Changed = true; continue; @@ -4592,23 +4645,28 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { Changed = true; } } - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + if (DTU) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); if (CSI->getNumHandlers() == 0) { if (CSI->hasUnwindDest()) { // Redirect all predecessors of the block containing CatchSwitchInst // to instead branch to the CatchSwitchInst's unwind destination. - for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { - Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor, - CSI->getUnwindDest()}); - Updates.push_back( - {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor}); + if (DTU) { + for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { + Updates.push_back({DominatorTree::Insert, + PredecessorOfPredecessor, + CSI->getUnwindDest()}); + Updates.push_back({DominatorTree::Delete, + PredecessorOfPredecessor, Predecessor}); + } } Predecessor->replaceAllUsesWith(CSI->getUnwindDest()); } else { // Rewrite all preds to unwind to caller (or from invoke to call). - if (DTU) + if (DTU) { DTU->applyUpdates(Updates); - Updates.clear(); + Updates.clear(); + } SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor)); for (BasicBlock *EHPred : EHPreds) removeUnwindEdge(EHPred, DTU); @@ -4622,7 +4680,8 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { (void)CRI; assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB && "Expected to always have an unwind to BB."); - Updates.push_back({DominatorTree::Delete, Predecessor, BB}); + if (DTU) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; @@ -4634,11 +4693,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { // If this block is now dead, remove it. if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { - // We know there are no successors, so just nuke the block. - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); + DeleteDeadBlock(BB, DTU); return true; } @@ -4669,8 +4724,9 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, {DominatorTree::Delete, BB, OrigDefaultBlock}}); SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU); SmallVector<DominatorTree::UpdateType, 2> Updates; - for (auto *Successor : successors(NewDefaultBlock)) - Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor}); + if (DTU) + for (auto *Successor : successors(NewDefaultBlock)) + Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor}); auto *NewTerminator = NewDefaultBlock->getTerminator(); new UnreachableInst(Switch->getContext(), NewTerminator); EraseTerminatorAndDCECond(NewTerminator); @@ -4822,15 +4878,17 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; - SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; for (auto &Case : SI->cases()) { auto *Successor = Case.getCaseSuccessor(); - ++NumPerSuccessorCases[Successor]; + if (DTU) + ++NumPerSuccessorCases[Successor]; const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); - --NumPerSuccessorCases[Successor]; + if (DTU) + --NumPerSuccessorCases[Successor]; LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); } @@ -4865,12 +4923,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, SIW.removeCase(CaseI); } - std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); - if (DTU) + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); DTU->applyUpdates(Updates); + } return true; } @@ -5197,11 +5256,9 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder) { - assert(ResultVector.size() == 2 && - "We should have exactly two unique results at this point"); // If we are selecting between only two cases transform into a simple // select or a two-way select if default is possible. - if (ResultVector[0].second.size() == 1 && + if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 && ResultVector[1].second.size() == 1) { ConstantInt *const FirstCase = ResultVector[0].second[0]; ConstantInt *const SecondCase = ResultVector[1].second[0]; @@ -5220,6 +5277,17 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, SelectValue, "switch.select"); } + // Handle the degenerate case where two cases have the same value. + if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 && + DefaultResult) { + Value *Cmp1 = Builder.CreateICmpEQ( + Condition, ResultVector[0].second[0], "switch.selectcmp.case1"); + Value *Cmp2 = Builder.CreateICmpEQ( + Condition, ResultVector[0].second[1], "switch.selectcmp.case2"); + Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); + return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + } + return nullptr; } @@ -5234,7 +5302,7 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, BasicBlock *SelectBB = SI->getParent(); BasicBlock *DestBB = PHI->getParent(); - if (!is_contained(predecessors(DestBB), SelectBB)) + if (DTU && !is_contained(predecessors(DestBB), SelectBB)) Updates.push_back({DominatorTree::Insert, SelectBB, DestBB}); Builder.CreateBr(DestBB); @@ -5244,13 +5312,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, PHI->removeIncomingValue(SelectBB); PHI->addIncoming(SelectValue, SelectBB); + SmallPtrSet<BasicBlock *, 4> RemovedSuccessors; for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); if (Succ == DestBB) continue; Succ->removePredecessor(SelectBB); - Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); + if (DTU && RemovedSuccessors.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); } SI->eraseFromParent(); if (DTU) @@ -5270,10 +5340,8 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, - DL, TTI, 2, 1)) - return false; - // Selects choose between maximum two values. - if (UniqueResults.size() != 2) + DL, TTI, /*MaxUniqueResults*/2, + /*MaxCasesPerResult*/2)) return false; assert(PHI != nullptr && "PHI for value select not found"); @@ -5642,8 +5710,7 @@ static void reuseTableCompare( // Although this check is invariant in the calling loops, it's better to do it // at this late stage. Practically we do it at most once for a switch. BasicBlock *BranchBlock = RangeCheckBranch->getParent(); - for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) { - BasicBlock *Pred = *PI; + for (BasicBlock *Pred : predecessors(PhiBlock)) { if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock) return; } @@ -5675,7 +5742,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Only build lookup table when we have a target that supports it or the // attribute is not set. if (!TTI.shouldBuildLookupTables() || - (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")) + (Fn->getFnAttribute("no-jump-tables").getValueAsBool())) return false; // FIXME: If the switch is too sparse for a lookup table, perhaps we could @@ -5799,7 +5866,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); - Updates.push_back({DominatorTree::Insert, BB, LookupBB}); + if (DTU) + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); // Note: We call removeProdecessor later since we need to be able to get the // PHI value for the default case in case we're using a bit mask. } else { @@ -5807,7 +5875,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); - Updates.push_back({DominatorTree::Insert, BB, LookupBB}); + if (DTU) + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } // Populate the BB that does the lookups. @@ -5845,8 +5914,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Value *LoBit = Builder.CreateTrunc( Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); - Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); - Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); + if (DTU) { + Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); + Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); + } Builder.SetInsertPoint(LookupBB); AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB); } @@ -5856,10 +5927,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // do not delete PHINodes here. SI->getDefaultDest()->removePredecessor(BB, /*KeepOneInputPHIs=*/true); - Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); + if (DTU) + Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); } - bool ReturnedEarly = false; for (PHINode *PHI : PHIs) { const ResultListTy &ResultList = ResultLists[PHI]; @@ -5871,15 +5942,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Value *Result = Table.BuildLookup(TableIndex, Builder); - // If the result is used to return immediately from the function, we want to - // do that right here. - if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) && - PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) { - Builder.CreateRet(Result); - ReturnedEarly = true; - break; - } - // Do a small peephole optimization: re-use the switch table compare if // possible. if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { @@ -5893,13 +5955,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, PHI->addIncoming(Result, LookupBB); } - if (!ReturnedEarly) { - Builder.CreateBr(CommonDest); + Builder.CreateBr(CommonDest); + if (DTU) Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); - } // Remove the switch. - SmallSetVector<BasicBlock *, 8> RemovedSuccessors; + SmallPtrSet<BasicBlock *, 8> RemovedSuccessors; for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); @@ -6081,7 +6142,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; - SmallSetVector<BasicBlock *, 8> RemovedSuccs; + SmallPtrSet<BasicBlock *, 8> RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { @@ -6171,15 +6232,16 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. - SmallPtrSet<BasicBlock *, 16> Preds; - Preds.insert(pred_begin(BB), pred_end(BB)); + SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); for (BasicBlock *Pred : Preds) { InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); II->setUnwindDest(OtherPred); - Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); - Updates.push_back({DominatorTree::Delete, Pred, BB}); + if (DTU) { + Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); + Updates.push_back({DominatorTree::Delete, Pred, BB}); + } } // The debug info in OtherPred doesn't cover the merged control flow that @@ -6191,11 +6253,11 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, Inst.eraseFromParent(); } - SmallPtrSet<BasicBlock *, 16> Succs; - Succs.insert(succ_begin(BB), succ_end(BB)); + SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB)); for (BasicBlock *Succ : Succs) { Succ->removePredecessor(BB); - Updates.push_back({DominatorTree::Delete, BB, Succ}); + if (DTU) + Updates.push_back({DominatorTree::Delete, BB, Succ}); } IRBuilder<> Builder(BI); @@ -6229,7 +6291,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, Options.NeedCanonicalLoop && (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) return true; @@ -6290,8 +6352,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return requestResimplify(); // This block must be empty, except for the setcond inst, if it exists. - // Ignore dbg intrinsics. - auto I = BB->instructionsWithoutDebug().begin(); + // Ignore dbg and pseudo intrinsics. + auto I = BB->instructionsWithoutDebug(true).begin(); if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) return requestResimplify(); @@ -6332,9 +6394,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistCommon && Options.HoistCommonInsts) - if (HoistThenElseCodeToIf(BI, TTI)) - return requestResimplify(); + if (HoistCommon && + HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts)) + return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -6362,8 +6424,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return requestResimplify(); // Scan predecessor blocks for conditional branches. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + for (BasicBlock *Pred : predecessors(BB)) + if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) return requestResimplify(); @@ -6397,9 +6459,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu for (BasicBlock::iterator i = ++BasicBlock::iterator(I), UI = BasicBlock::iterator(dyn_cast<Instruction>(Use)); - i != UI; ++i) - if (i == I->getParent()->end() || i->mayHaveSideEffects()) + i != UI; ++i) { + if (i == I->getParent()->end()) return false; + if (!isGuaranteedToTransferExecutionToSuccessor(&*i)) + return false; + } // Look through GEPs. A load from a GEP derived from NULL is still undefined if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) @@ -6437,8 +6502,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu for (const llvm::Use &Arg : CB->args()) if (Arg == I) { unsigned ArgIdx = CB->getArgOperandNo(&Arg); - if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) && - CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + if (CB->isPassingUndefUB(ArgIdx) && + CB->paramHasAttr(ArgIdx, Attribute::NonNull)) { // Passing null to a nonnnull+noundef argument is undefined. return !PtrValueMayBeModified; } @@ -6448,7 +6513,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu for (const llvm::Use &Arg : CB->args()) if (Arg == I) { unsigned ArgIdx = CB->getArgOperandNo(&Arg); - if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + if (CB->isPassingUndefUB(ArgIdx)) { // Passing undef to a noundef argument is undefined. return true; } @@ -6522,7 +6587,14 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { return true; if (SinkCommon && Options.SinkCommonInsts) - Changed |= SinkCommonCodeFromPredecessors(BB, DTU); + if (SinkCommonCodeFromPredecessors(BB, DTU)) { + // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's, + // so we may now how duplicate PHI's. + // Let's rerun EliminateDuplicatePHINodes() first, + // before FoldTwoEntryPHINode() potentially converts them into select's, + // after which we'd need a whole EarlyCSE pass run to cleanup them. + return true; + } IRBuilder<> Builder(BB); @@ -6540,9 +6612,6 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { case Instruction::Br: Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder); break; - case Instruction::Ret: - Changed |= simplifyReturn(cast<ReturnInst>(Terminator), Builder); - break; case Instruction::Resume: Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder); break; @@ -6566,20 +6635,10 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { bool Changed = simplifyOnceImpl(BB); - assert((!RequireAndPreserveDomTree || - (DTU && - DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && - "Failed to maintain validity of domtree!"); - return Changed; } bool SimplifyCFGOpt::run(BasicBlock *BB) { - assert((!RequireAndPreserveDomTree || - (DTU && - DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && - "Original domtree is invalid?"); - bool Changed = false; // Repeated simplify BB as long as resimplification is requested. @@ -6597,7 +6656,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, ArrayRef<WeakVH> LoopHeaders) { - return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr, - BB->getModule()->getDataLayout(), LoopHeaders, Options) + return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders, + Options) .run(BB); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 290c04a7ad10..bd30be011472 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -99,6 +99,24 @@ namespace { }; } +/// Find a point in code which dominates all given instructions. We can safely +/// assume that, whatever fact we can prove at the found point, this fact is +/// also true for each of the given instructions. +static Instruction *findCommonDominator(ArrayRef<Instruction *> Instructions, + DominatorTree &DT) { + Instruction *CommonDom = nullptr; + for (auto *Insn : Instructions) + if (!CommonDom || DT.dominates(Insn, CommonDom)) + CommonDom = Insn; + else if (!DT.dominates(CommonDom, Insn)) + // If there is no dominance relation, use common dominator. + CommonDom = + DT.findNearestCommonDominator(CommonDom->getParent(), + Insn->getParent())->getTerminator(); + assert(CommonDom && "Common dominator not found?"); + return CommonDom; +} + /// Fold an IV operand into its use. This removes increments of an /// aligned IV when used by a instruction that ignores the low bits. /// @@ -261,14 +279,14 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); - // If the condition is always true or always false, replace it with - // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) { - ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - DeadInsts.emplace_back(ICmp); - LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) { - ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + // If the condition is always true or always false in the given context, + // replace it with a constant value. + SmallVector<Instruction *, 4> Users; + for (auto *U : ICmp->users()) + Users.push_back(cast<Instruction>(U)); + const Instruction *CtxI = findCommonDominator(Users, *DT); + if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) { + ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev)); DeadInsts.emplace_back(ICmp); LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { @@ -404,46 +422,10 @@ void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, replaceSRemWithURem(Rem); } -static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp, - bool Signed, const SCEV *LHS, const SCEV *RHS) { - const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *, - SCEV::NoWrapFlags, unsigned); - switch (BinOp) { - default: - llvm_unreachable("Unsupported binary op"); - case Instruction::Add: - Operation = &ScalarEvolution::getAddExpr; - break; - case Instruction::Sub: - Operation = &ScalarEvolution::getMinusSCEV; - break; - case Instruction::Mul: - Operation = &ScalarEvolution::getMulExpr; - break; - } - - const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) = - Signed ? &ScalarEvolution::getSignExtendExpr - : &ScalarEvolution::getZeroExtendExpr; - - // Check ext(LHS op RHS) == ext(LHS) op ext(RHS) - auto *NarrowTy = cast<IntegerType>(LHS->getType()); - auto *WideTy = - IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); - - const SCEV *A = - (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), - WideTy, 0); - const SCEV *B = - (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0), - (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0); - return A == B; -} - bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { const SCEV *LHS = SE->getSCEV(WO->getLHS()); const SCEV *RHS = SE->getSCEV(WO->getRHS()); - if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) + if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) return false; // Proved no overflow, nuke the overflow check and, if possible, the overflow @@ -484,7 +466,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) { const SCEV *LHS = SE->getSCEV(SI->getLHS()); const SCEV *RHS = SE->getSCEV(SI->getRHS()); - if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) + if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) return false; BinaryOperator *BO = BinaryOperator::Create( @@ -738,34 +720,25 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, /// unsigned-overflow. Returns true if anything changed, false otherwise. bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, Value *IVOperand) { - // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`. - if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) - return false; - - if (BO->getOpcode() != Instruction::Add && - BO->getOpcode() != Instruction::Sub && - BO->getOpcode() != Instruction::Mul) - return false; - - const SCEV *LHS = SE->getSCEV(BO->getOperand(0)); - const SCEV *RHS = SE->getSCEV(BO->getOperand(1)); - bool Changed = false; - - if (!BO->hasNoUnsignedWrap() && - willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) { - BO->setHasNoUnsignedWrap(); - SE->forgetValue(BO); - Changed = true; - } - - if (!BO->hasNoSignedWrap() && - willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) { - BO->setHasNoSignedWrap(); - SE->forgetValue(BO); - Changed = true; - } - - return Changed; + SCEV::NoWrapFlags Flags; + bool Deduced; + std::tie(Flags, Deduced) = SE->getStrengthenedNoWrapFlagsFromBinOp( + cast<OverflowingBinaryOperator>(BO)); + + if (!Deduced) + return Deduced; + + BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNUW) == + SCEV::FlagNUW); + BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNSW) == + SCEV::FlagNSW); + + // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap + // flags on addrecs while performing zero/sign extensions. We could call + // forgetValue() here to make sure those flags also propagate to any other + // SCEV expressions based on the addrec. However, this can have pathological + // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384. + return Deduced; } /// Annotate the Shr in (X << IVOperand) >> C as exact using the @@ -1386,7 +1359,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { /// so, return the extended recurrence and the kind of extension used. Otherwise /// return {nullptr, Unknown}. WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { - if (!SE->isSCEVable(DU.NarrowUse->getType())) + if (!DU.NarrowUse->getType()->isIntegerTy()) return {nullptr, Unknown}; const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); @@ -1575,17 +1548,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { // We'll prove some facts that should be true in the context of ext users. If // there is no users, we are done now. If there are some, pick their common // dominator as context. - Instruction *Context = nullptr; - for (auto *Ext : ExtUsers) { - if (!Context || DT->dominates(Ext, Context)) - Context = Ext; - else if (!DT->dominates(Context, Ext)) - // For users that don't have dominance relation, use common dominator. - Context = - DT->findNearestCommonDominator(Context->getParent(), Ext->getParent()) - ->getTerminator(); - } - assert(Context && "Context not found?"); + const Instruction *CtxI = findCommonDominator(ExtUsers, *DT); if (!CanSignExtend && !CanZeroExtend) { // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we @@ -1601,8 +1564,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { return false; if (!SE->isKnownNegative(RHS)) return false; - bool ProvedSubNUW = SE->isKnownPredicateAt( - ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context); + bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS, + SE->getNegativeSCEV(RHS), CtxI); if (!ProvedSubNUW) return false; // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index f9a9dd237b6c..b8e0f63c481d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -56,38 +56,6 @@ static bool ignoreCallingConv(LibFunc Func) { Func == LibFunc_llabs || Func == LibFunc_strlen; } -static bool isCallingConvCCompatible(CallInst *CI) { - switch(CI->getCallingConv()) { - default: - return false; - case llvm::CallingConv::C: - return true; - case llvm::CallingConv::ARM_APCS: - case llvm::CallingConv::ARM_AAPCS: - case llvm::CallingConv::ARM_AAPCS_VFP: { - - // The iOS ABI diverges from the standard in some cases, so for now don't - // try to simplify those calls. - if (Triple(CI->getModule()->getTargetTriple()).isiOS()) - return false; - - auto *FuncTy = CI->getFunctionType(); - - if (!FuncTy->getReturnType()->isPointerTy() && - !FuncTy->getReturnType()->isIntegerTy() && - !FuncTy->getReturnType()->isVoidTy()) - return false; - - for (auto Param : FuncTy->params()) { - if (!Param->isPointerTy() && !Param->isIntegerTy()) - return false; - } - return true; - } - } - return false; -} - /// Return true if it is only used in equality comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { for (User *U : V->users()) { @@ -190,13 +158,16 @@ static void annotateDereferenceableBytes(CallInst *CI, } } -static void annotateNonNullBasedOnAccess(CallInst *CI, +static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI, ArrayRef<unsigned> ArgNos) { Function *F = CI->getCaller(); if (!F) return; for (unsigned ArgNo : ArgNos) { + if (!CI->paramHasAttr(ArgNo, Attribute::NoUndef)) + CI->addParamAttr(ArgNo, Attribute::NoUndef); + if (CI->paramHasAttr(ArgNo, Attribute::NonNull)) continue; unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); @@ -211,10 +182,10 @@ static void annotateNonNullBasedOnAccess(CallInst *CI, static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos, Value *Size, const DataLayout &DL) { if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) { - annotateNonNullBasedOnAccess(CI, ArgNos); + annotateNonNullNoUndefBasedOnAccess(CI, ArgNos); annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue()); } else if (isKnownNonZero(Size, DL)) { - annotateNonNullBasedOnAccess(CI, ArgNos); + annotateNonNullNoUndefBasedOnAccess(CI, ArgNos); const APInt *X, *Y; uint64_t DerefMin = 1; if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) { @@ -232,7 +203,7 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) { // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); - annotateNonNullBasedOnAccess(CI, {0, 1}); + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -276,9 +247,9 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) { Value *Src = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); uint64_t Len; - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); if (isKnownNonZero(Size, DL)) - annotateNonNullBasedOnAccess(CI, 1); + annotateNonNullNoUndefBasedOnAccess(CI, 1); // We don't do anything if length is not constant. ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size); @@ -317,7 +288,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); Value *SrcStr = CI->getArgOperand(0); - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. @@ -361,7 +332,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) { Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); // Cannot fold anything if we're not looking for a constant. if (!CharC) @@ -437,7 +408,7 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) { TLI); } - annotateNonNullBasedOnAccess(CI, {0, 1}); + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); return nullptr; } @@ -449,7 +420,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) { return ConstantInt::get(CI->getType(), 0); if (isKnownNonZero(Size, DL)) - annotateNonNullBasedOnAccess(CI, {0, 1}); + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); // Get the length argument if it is constant. uint64_t Length; if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size)) @@ -527,7 +498,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) { if (Dst == Src) // strcpy(x,x) -> x return Src; - annotateNonNullBasedOnAccess(CI, {0, 1}); + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len) @@ -580,9 +551,9 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); if (isKnownNonZero(Size, DL)) - annotateNonNullBasedOnAccess(CI, 1); + annotateNonNullNoUndefBasedOnAccess(CI, 1); uint64_t Len; if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size)) @@ -604,8 +575,10 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { } if (SrcLen == 0) { - // strncpy(x, "", y) -> memset(align 1 x, '\0', y) - CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, Align(1)); + // strncpy(x, "", y) -> memset(x, '\0', y) + Align MemSetAlign = + CI->getAttributes().getParamAttributes(0).getAlignment().valueOrOne(); + CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign); AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0)); NewCI->setAttributes(NewCI->getAttributes().addParamAttributes( CI->getContext(), 0, ArgAttrs)); @@ -728,7 +701,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) { if (Value *V = optimizeStringLength(CI, B, 8)) return V; - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); return nullptr; } @@ -839,8 +812,8 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { StrLen, B, DL, TLI); if (!StrNCmp) return nullptr; - for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) { - ICmpInst *Old = cast<ICmpInst>(*UI++); + for (User *U : llvm::make_early_inc_range(CI->users())) { + ICmpInst *Old = cast<ICmpInst>(U); Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); @@ -878,13 +851,13 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } - annotateNonNullBasedOnAccess(CI, {0, 1}); + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); return nullptr; } Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) { if (isKnownNonZero(CI->getOperand(2), DL)) - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); return nullptr; } @@ -960,7 +933,8 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { // Finally merge both checks and cast to pointer type. The inttoptr // implicitly zexts the i1 to intptr type. - return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType()); + return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"), + CI->getType()); } // Check if all arguments are constants. If so, we can constant fold. @@ -1451,17 +1425,18 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) { return InnerChain[Exp]; } -// Return a properly extended 32-bit integer if the operation is an itofp. -static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B) { +// Return a properly extended integer (DstWidth bits wide) if the operation is +// an itofp. +static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) { if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) { Value *Op = cast<Instruction>(I2F)->getOperand(0); - // Make sure that the exponent fits inside an int32_t, + // Make sure that the exponent fits inside an "int" of size DstWidth, // thus avoiding any range issues that FP has not. unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits(); - if (BitWidth < 32 || - (BitWidth == 32 && isa<SIToFPInst>(I2F))) - return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getInt32Ty()) - : B.CreateZExt(Op, B.getInt32Ty()); + if (BitWidth < DstWidth || + (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) + return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getIntNTy(DstWidth)) + : B.CreateZExt(Op, B.getIntNTy(DstWidth)); } return nullptr; @@ -1551,7 +1526,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { if (match(Base, m_SpecificFP(2.0)) && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) && hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { - if (Value *ExpoI = getIntToFPVal(Expo, B)) + if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, B, Attrs); @@ -1690,7 +1665,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, IRBuilderBase &B) { Value *Args[] = {Base, Expo}; - Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + Type *Types[] = {Base->getType(), Expo->getType()}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Types); return B.CreateCall(F, Args); } @@ -1701,20 +1677,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); Module *M = Pow->getModule(); - Value *Shrunk = nullptr; bool AllowApprox = Pow->hasApproxFunc(); bool Ignored; // Propagate the math semantics from the call to any created instructions. IRBuilderBase::FastMathFlagGuard Guard(B); B.setFastMathFlags(Pow->getFastMathFlags()); - - // Shrink pow() to powf() if the arguments are single precision, - // unless the result is expected to be double precision. - if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && - hasFloatVersion(Name)) - Shrunk = optimizeBinaryDoubleFP(Pow, B, true); - // Evaluate special cases related to the base. // pow(1.0, x) -> 1.0 @@ -1799,23 +1767,31 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { return FMul; } - APSInt IntExpo(32, /*isUnsigned=*/false); + APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false); // powf(x, n) -> powi(x, n) if n is a constant signed integer value if (ExpoF->isInteger() && ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == APFloat::opOK) { return createPowWithIntegerExponent( - Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B); } } // powf(x, itofp(y)) -> powi(x, y) if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) { - if (Value *ExpoI = getIntToFPVal(Expo, B)) + if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) return createPowWithIntegerExponent(Base, ExpoI, M, B); } - return Shrunk; + // Shrink pow() to powf() if the arguments are single precision, + // unless the result is expected to be double precision. + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && + hasFloatVersion(Name)) { + if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, true)) + return Shrunk; + } + + return nullptr; } Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { @@ -1830,11 +1806,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { Type *Ty = CI->getType(); Value *Op = CI->getArgOperand(0); - // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 - // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize + // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) && hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { - if (Value *Exp = getIntToFPVal(Op, B)) + if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, B, Attrs); @@ -2404,18 +2380,28 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) { if (FormatStr.size() == 1 || FormatStr == "%%") return emitPutChar(B.getInt32(FormatStr[0]), B, TLI); - // printf("%s", "a") --> putchar('a') + // Try to remove call or emit putchar/puts. if (FormatStr == "%s" && CI->getNumArgOperands() > 1) { - StringRef ChrStr; - if (!getConstantStringInfo(CI->getOperand(1), ChrStr)) - return nullptr; - if (ChrStr.size() != 1) + StringRef OperandStr; + if (!getConstantStringInfo(CI->getOperand(1), OperandStr)) return nullptr; - return emitPutChar(B.getInt32(ChrStr[0]), B, TLI); + // printf("%s", "") --> NOP + if (OperandStr.empty()) + return (Value *)CI; + // printf("%s", "a") --> putchar('a') + if (OperandStr.size() == 1) + return emitPutChar(B.getInt32(OperandStr[0]), B, TLI); + // printf("%s", str"\n") --> puts(str) + if (OperandStr.back() == '\n') { + OperandStr = OperandStr.drop_back(); + Value *GV = B.CreateGlobalString(OperandStr, "str"); + return emitPutS(GV, B, TLI); + } + return nullptr; } // printf("foo\n") --> puts("foo") - if (FormatStr[FormatStr.size() - 1] == '\n' && + if (FormatStr.back() == '\n' && FormatStr.find('%') == StringRef::npos) { // No format characters. // Create a string literal with no \n on it. We expect the constant merge // pass to be run after this pass, to merge duplicate strings. @@ -2470,7 +2456,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) { return New; } - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); return nullptr; } @@ -2482,6 +2468,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, return nullptr; // If we just have a format string (nothing else crazy) transform it. + Value *Dest = CI->getArgOperand(0); if (CI->getNumArgOperands() == 2) { // Make sure there's no % in the constant array. We could try to handle // %% -> % in the future if we cared. @@ -2490,7 +2477,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1) B.CreateMemCpy( - CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), + Dest, Align(1), CI->getArgOperand(1), Align(1), ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size() + 1)); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); @@ -2508,7 +2495,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); - Value *Ptr = castToCStr(CI->getArgOperand(0), B); + Value *Ptr = castToCStr(Dest, B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); @@ -2524,19 +2511,20 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, if (CI->use_empty()) // sprintf(dest, "%s", str) -> strcpy(dest, str) - return emitStrCpy(CI->getArgOperand(0), CI->getArgOperand(2), B, TLI); + return emitStrCpy(Dest, CI->getArgOperand(2), B, TLI); uint64_t SrcLen = GetStringLength(CI->getArgOperand(2)); if (SrcLen) { B.CreateMemCpy( - CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1), + Dest, Align(1), CI->getArgOperand(2), Align(1), ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen)); // Returns total number of characters written without null-character. return ConstantInt::get(CI->getType(), SrcLen - 1); - } else if (Value *V = emitStpCpy(CI->getArgOperand(0), CI->getArgOperand(2), - B, TLI)) { + } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) { // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest - Value *PtrDiff = B.CreatePtrDiff(V, CI->getArgOperand(0)); + // Handle mismatched pointer types (goes away with typeless pointers?). + V = B.CreatePointerCast(V, Dest->getType()); + Value *PtrDiff = B.CreatePtrDiff(V, Dest); return B.CreateIntCast(PtrDiff, CI->getType(), false); } @@ -2551,8 +2539,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, return nullptr; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(2), - Align(1), IncLen); + B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1), IncLen); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -2592,7 +2579,7 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) { return New; } - annotateNonNullBasedOnAccess(CI, {0, 1}); + annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); return nullptr; } @@ -2681,7 +2668,7 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) { } if (isKnownNonZero(CI->getOperand(1), DL)) - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); return nullptr; } @@ -2824,7 +2811,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) { } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) { - annotateNonNullBasedOnAccess(CI, 0); + annotateNonNullNoUndefBasedOnAccess(CI, 0); if (!CI->use_empty()) return nullptr; @@ -2859,9 +2846,10 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, // Check for string/memory library functions. if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { // Make sure we never change the calling convention. - assert((ignoreCallingConv(Func) || - isCallingConvCCompatible(CI)) && - "Optimizing string/memory libcall would change the calling convention"); + assert( + (ignoreCallingConv(Func) || + TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) && + "Optimizing string/memory libcall would change the calling convention"); switch (Func) { case LibFunc_strcat: return optimizeStrCat(CI, Builder); @@ -3045,7 +3033,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { LibFunc Func; Function *Callee = CI->getCalledFunction(); - bool isCallingConvC = isCallingConvCCompatible(CI); + bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI); SmallVector<OperandBundleDef, 2> OpBundles; CI->getOperandBundlesAsDefs(OpBundles); @@ -3063,7 +3051,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { // First, check for intrinsics. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { - if (!isCallingConvC) + if (!IsCallingConvC) return nullptr; // The FP intrinsics have corresponding constrained versions so we don't // need to check for the StrictFP attribute here. @@ -3116,7 +3104,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { // Then check for known library functions. if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { // We never change the calling convention. - if (!ignoreCallingConv(Func) && !isCallingConvC) + if (!ignoreCallingConv(Func) && !IsCallingConvC) return nullptr; if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) return V; @@ -3500,7 +3488,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, LibFunc Func; Function *Callee = CI->getCalledFunction(); - bool isCallingConvC = isCallingConvCCompatible(CI); + bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI); SmallVector<OperandBundleDef, 2> OpBundles; CI->getOperandBundlesAsDefs(OpBundles); @@ -3514,7 +3502,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, return nullptr; // We never change the calling convention. - if (!ignoreCallingConv(Func) && !isCallingConvC) + if (!ignoreCallingConv(Func) && !IsCallingConvC) return nullptr; switch (Func) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp index beeb60698f04..08a29ea16ba1 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -14,45 +14,45 @@ using namespace llvm; -cl::opt<bool> EnablePGSO( +cl::opt<bool> llvm::EnablePGSO( "pgso", cl::Hidden, cl::init(true), cl::desc("Enable the profile guided size optimizations. ")); -cl::opt<bool> PGSOLargeWorkingSetSizeOnly( +cl::opt<bool> llvm::PGSOLargeWorkingSetSizeOnly( "pgso-lwss-only", cl::Hidden, cl::init(true), cl::desc("Apply the profile guided size optimizations only " "if the working set size is large (except for cold code.)")); -cl::opt<bool> PGSOColdCodeOnly( +cl::opt<bool> llvm::PGSOColdCodeOnly( "pgso-cold-code-only", cl::Hidden, cl::init(false), cl::desc("Apply the profile guided size optimizations only " "to cold code.")); -cl::opt<bool> PGSOColdCodeOnlyForInstrPGO( +cl::opt<bool> llvm::PGSOColdCodeOnlyForInstrPGO( "pgso-cold-code-only-for-instr-pgo", cl::Hidden, cl::init(false), cl::desc("Apply the profile guided size optimizations only " "to cold code under instrumentation PGO.")); -cl::opt<bool> PGSOColdCodeOnlyForSamplePGO( +cl::opt<bool> llvm::PGSOColdCodeOnlyForSamplePGO( "pgso-cold-code-only-for-sample-pgo", cl::Hidden, cl::init(false), cl::desc("Apply the profile guided size optimizations only " "to cold code under sample PGO.")); -cl::opt<bool> PGSOColdCodeOnlyForPartialSamplePGO( +cl::opt<bool> llvm::PGSOColdCodeOnlyForPartialSamplePGO( "pgso-cold-code-only-for-partial-sample-pgo", cl::Hidden, cl::init(false), cl::desc("Apply the profile guided size optimizations only " "to cold code under partial-profile sample PGO.")); -cl::opt<bool> ForcePGSO( +cl::opt<bool> llvm::ForcePGSO( "force-pgso", cl::Hidden, cl::init(false), cl::desc("Force the (profiled-guided) size optimizations. ")); -cl::opt<int> PgsoCutoffInstrProf( +cl::opt<int> llvm::PgsoCutoffInstrProf( "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000), cl::ZeroOrMore, cl::desc("The profile guided size optimization profile summary cutoff " "for instrumentation profile.")); -cl::opt<int> PgsoCutoffSampleProf( +cl::opt<int> llvm::PgsoCutoffSampleProf( "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), cl::ZeroOrMore, cl::desc("The profile guided size optimization profile summary cutoff " "for sample profile.")); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp index e2c387cb8983..32f2f4e233b2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp @@ -95,13 +95,12 @@ static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, // globalized. // Try to balance pack those partitions into N files since this roughly equals // thread balancing for the backend codegen step. -static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, +static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap, unsigned N) { // At this point module should have the proper mix of globals and locals. // As we attempt to partition this module, we must not change any // locals to globals. - LLVM_DEBUG(dbgs() << "Partition module with (" << M->size() - << ")functions\n"); + LLVM_DEBUG(dbgs() << "Partition module with (" << M.size() << ")functions\n"); ClusterMapType GVtoClusterMap; ComdatMembersType ComdatMembers; @@ -144,9 +143,9 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); }; - llvm::for_each(M->functions(), recordGVSet); - llvm::for_each(M->globals(), recordGVSet); - llvm::for_each(M->aliases(), recordGVSet); + llvm::for_each(M.functions(), recordGVSet); + llvm::for_each(M.globals(), recordGVSet); + llvm::for_each(M.aliases(), recordGVSet); // Assigned all GVs to merged clusters while balancing number of objects in // each. @@ -247,31 +246,32 @@ static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { } void llvm::SplitModule( - std::unique_ptr<Module> M, unsigned N, + Module &M, unsigned N, function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback, bool PreserveLocals) { if (!PreserveLocals) { - for (Function &F : *M) + for (Function &F : M) externalize(&F); - for (GlobalVariable &GV : M->globals()) + for (GlobalVariable &GV : M.globals()) externalize(&GV); - for (GlobalAlias &GA : M->aliases()) + for (GlobalAlias &GA : M.aliases()) externalize(&GA); - for (GlobalIFunc &GIF : M->ifuncs()) + for (GlobalIFunc &GIF : M.ifuncs()) externalize(&GIF); } // This performs splitting without a need for externalization, which might not // always be possible. ClusterIDMapType ClusterIDMap; - findPartitions(M.get(), ClusterIDMap, N); + findPartitions(M, ClusterIDMap, N); // FIXME: We should be able to reuse M as the last partition instead of - // cloning it. + // cloning it. Note that the callers at the moment expect the module to + // be preserved, so will need some adjustments as well. for (unsigned I = 0; I < N; ++I) { ValueToValueMapTy VMap; std::unique_ptr<Module> MPart( - CloneModule(*M, VMap, [&](const GlobalValue *GV) { + CloneModule(M, VMap, [&](const GlobalValue *GV) { if (ClusterIDMap.count(GV)) return (ClusterIDMap[GV] == I); else diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp deleted file mode 100644 index c57cec6be676..000000000000 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp +++ /dev/null @@ -1,118 +0,0 @@ -//===- UniqueInternalLinkageNames.cpp - Unique Internal Linkage Sym Names -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements unique naming of internal linkage symbols with option -// -funique-internal-linkage-symbols. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/MD5.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" - -using namespace llvm; - -static bool uniqueifyInternalLinkageNames(Module &M) { - llvm::MD5 Md5; - Md5.update(M.getSourceFileName()); - llvm::MD5::MD5Result R; - Md5.final(R); - SmallString<32> Str; - llvm::MD5::stringifyResult(R, Str); - // Convert MD5hash to Decimal. Demangler suffixes can either contain numbers - // or characters but not both. - APInt IntHash = APInt(128, Str.str(), 16); - // Prepend "__uniq" before the hash for tools like profilers to understand that - // this symbol is of internal linkage type. - std::string ModuleNameHash = (Twine(".__uniq.") + Twine(IntHash.toString(10, false))).str(); - bool Changed = false; - MDBuilder MDB(M.getContext()); - - // Append the module hash to all internal linkage functions. - for (auto &F : M) { - if (F.hasInternalLinkage()) { - F.setName(F.getName() + ModuleNameHash); - F.addFnAttr("sample-profile-suffix-elision-policy", "selected"); - // Replace linkage names in the debug metadata. - if (DISubprogram *SP = F.getSubprogram()) { - if (SP->getRawLinkageName()) { - auto *Name = MDB.createString(F.getName()); - SP->replaceRawLinkageName(Name); - if (DISubprogram *SPDecl = SP->getDeclaration()) { - if (SPDecl->getRawLinkageName()) - SPDecl->replaceRawLinkageName(Name); - } - } - } - Changed = true; - } - } - - // Append the module hash to all internal linkage globals. - for (auto &GV : M.globals()) { - if (GV.hasInternalLinkage()) { - GV.setName(GV.getName() + ModuleNameHash); - Changed = true; - } - } - return Changed; -} - -namespace { - -// Legacy pass that provides a name to every anon globals. -class UniqueInternalLinkageNamesLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { - return "Unique Internal Linkage Names"; - } - - explicit UniqueInternalLinkageNamesLegacyPass() : ModulePass(ID) { - initializeUniqueInternalLinkageNamesLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - return uniqueifyInternalLinkageNames(M); - } -}; - -char UniqueInternalLinkageNamesLegacyPass::ID = 0; -} // anonymous namespace - -PreservedAnalyses -UniqueInternalLinkageNamesPass::run(Module &M, ModuleAnalysisManager &AM) { - if (!uniqueifyInternalLinkageNames(M)) - return PreservedAnalyses::all(); - - return PreservedAnalyses::none(); -} - -INITIALIZE_PASS_BEGIN(UniqueInternalLinkageNamesLegacyPass, - "unique-internal-linkage-names", - "Uniqueify internal linkage names", false, false) -INITIALIZE_PASS_END(UniqueInternalLinkageNamesLegacyPass, - "unique-internal-linkage-names", - "Uniqueify Internal linkage names", false, false) - -namespace llvm { -ModulePass *createUniqueInternalLinkageNamesPass() { - return new UniqueInternalLinkageNamesLegacyPass(); -} -} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp index 73c0532f3fd5..3ca36a1cad91 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp @@ -45,7 +45,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeInjectTLIMappingsLegacyPass(Registry); initializeFixIrreduciblePass(Registry); initializeUnifyLoopExitsLegacyPassPass(Registry); - initializeUniqueInternalLinkageNamesLegacyPassPass(Registry); } /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp index 61cd8595a73b..6336af25ef98 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -189,14 +189,6 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, if (StoreBase != LoadBase) return -1; - // If the load and store are to the exact same address, they should have been - // a must alias. AA must have gotten confused. - // FIXME: Study to see if/when this happens. One case is forwarding a memset - // to a load from the base of the memset. - - // If the load and store don't overlap at all, the store doesn't provide - // anything to the load. In this case, they really don't alias at all, AA - // must have gotten confused. uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); if ((WriteSizeInBits & 7) | (LoadSize & 7)) @@ -204,15 +196,6 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. LoadSize /= 8; - bool isAAFailure = false; - if (StoreOffset < LoadOffset) - isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset; - else - isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset; - - if (isAAFailure) - return -1; - // If the Load isn't completely contained within the stored bits, we don't // have all the bits to feed it. We could do something crazy in the future // (issue a smaller load then merge the bits in) but this seems unlikely to be @@ -221,6 +204,18 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, StoreOffset + StoreSize < LoadOffset + LoadSize) return -1; + // If the load and store are to the exact same address, they should have been + // a must alias. AA must have gotten confused. + // FIXME: Study to see if/when this happens. One case is forwarding a memset + // to a load from the base of the memset. + + // If the load and store don't overlap at all, the store doesn't provide + // anything to the load. In this case, they really don't alias at all, AA + // must have gotten confused. The if statement above ensure the condition + // that StoreOffset <= LoadOffset. + if (StoreOffset + int64_t(StoreSize) <= LoadOffset) + return -1; + // Okay, we can do this transformation. Return the number of bytes into the // store that the load is. return LoadOffset - StoreOffset; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp index 930e0b7ee01a..f3afd42e6163 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -26,8 +26,8 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalIndirectSymbol.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" @@ -37,6 +37,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" #include <cassert> #include <limits> #include <memory> @@ -44,6 +45,8 @@ using namespace llvm; +#define DEBUG_TYPE "value-mapper" + // Out of line method to get vtable etc for class. void ValueMapTypeRemapper::anchor() {} void ValueMaterializer::anchor() {} @@ -366,7 +369,7 @@ Value *Mapper::mapValue(const Value *V) { if (NewTy != IA->getFunctionType()) V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), IA->hasSideEffects(), IA->isAlignStack(), - IA->getDialect()); + IA->getDialect(), IA->canThrow()); } return getVM()[V] = const_cast<Value *>(V); @@ -390,6 +393,26 @@ Value *Mapper::mapValue(const Value *V) { : MetadataAsValue::get(V->getContext(), MDTuple::get(V->getContext(), None)); } + if (auto *AL = dyn_cast<DIArgList>(MD)) { + SmallVector<ValueAsMetadata *, 4> MappedArgs; + for (auto *VAM : AL->getArgs()) { + // Map both Local and Constant VAMs here; they will both ultimately + // be mapped via mapValue (apart from constants when we have no + // module level changes, which have an identity mapping). + if ((Flags & RF_NoModuleLevelChanges) && isa<ConstantAsMetadata>(VAM)) { + MappedArgs.push_back(VAM); + } else if (Value *LV = mapValue(VAM->getValue())) { + MappedArgs.push_back( + LV == VAM->getValue() ? VAM : ValueAsMetadata::get(LV)); + } else { + // If we cannot map the value, set the argument as undef. + MappedArgs.push_back(ValueAsMetadata::get( + UndefValue::get(VAM->getValue()->getType()))); + } + } + return MetadataAsValue::get(V->getContext(), + DIArgList::get(V->getContext(), MappedArgs)); + } // If this is a module-level metadata and we know that nothing at the module // level is changing, then use an identity mapping. @@ -412,6 +435,20 @@ Value *Mapper::mapValue(const Value *V) { if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) return mapBlockAddress(*BA); + if (const auto *E = dyn_cast<DSOLocalEquivalent>(C)) { + auto *Val = mapValue(E->getGlobalValue()); + GlobalValue *GV = dyn_cast<GlobalValue>(Val); + if (GV) + return getVM()[E] = DSOLocalEquivalent::get(GV); + + auto *Func = cast<Function>(Val->stripPointerCastsAndAliases()); + Type *NewTy = E->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + return getVM()[E] = llvm::ConstantExpr::getBitCast( + DSOLocalEquivalent::get(Func), NewTy); + } + auto mapValueOrNull = [this](Value *V) { auto Mapped = mapValue(V); assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) && @@ -533,23 +570,21 @@ Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) { return None; } -static Metadata *cloneOrBuildODR(const MDNode &N) { - auto *CT = dyn_cast<DICompositeType>(&N); - // If ODR type uniquing is enabled, we would have uniqued composite types - // with identifiers during bitcode reading, so we can just use CT. - if (CT && CT->getContext().isODRUniquingDebugTypes() && - CT->getIdentifier() != "") - return const_cast<DICompositeType *>(CT); - return MDNode::replaceWithDistinct(N.clone()); -} - MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) { assert(N.isDistinct() && "Expected a distinct node"); assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node"); - DistinctWorklist.push_back( - cast<MDNode>((M.Flags & RF_MoveDistinctMDs) - ? M.mapToSelf(&N) - : M.mapToMetadata(&N, cloneOrBuildODR(N)))); + Metadata *NewM = nullptr; + + if (M.Flags & RF_ReuseAndMutateDistinctMDs) { + NewM = M.mapToSelf(&N); + } else { + NewM = MDNode::replaceWithDistinct(N.clone()); + LLVM_DEBUG(dbgs() << "\nMap " << N << "\n" + << "To " << *NewM << "\n\n"); + M.mapToMetadata(&N, NewM); + } + DistinctWorklist.push_back(cast<MDNode>(NewM)); + return DistinctWorklist.back(); } @@ -597,6 +632,9 @@ void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) { for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) { Metadata *Old = N.getOperand(I); Metadata *New = mapOperand(Old); + if (Old != New) + LLVM_DEBUG(dbgs() << "Replacing Op " << Old << " with " << New << " in " + << N << "\n"); if (Old != New) N.replaceOperandWith(I, New); @@ -716,6 +754,11 @@ void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) { }); auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN)); + if (N && NewN && N != NewN) { + LLVM_DEBUG(dbgs() << "\nMap " << *N << "\n" + << "To " << *NewN << "\n\n"); + } + M.mapToMetadata(N, NewN); // Nodes that were referenced out of order in the POT are involved in a @@ -902,7 +945,8 @@ void Mapper::remapInstruction(Instruction *I) { AttributeList Attrs = CB->getAttributes(); for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) { for (Attribute::AttrKind TypedAttr : - {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) { + {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef, + Attribute::InAlloca}) { if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) { Attrs = Attrs.replaceAttributeType(C, i, TypedAttr, TypeMapper->remapType(Ty)); @@ -988,8 +1032,8 @@ void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, Elements.push_back(NewV); } - GV.setInitializer(ConstantArray::get( - cast<ArrayType>(GV.getType()->getElementType()), Elements)); + GV.setInitializer( + ConstantArray::get(cast<ArrayType>(GV.getValueType()), Elements)); } void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, |
