diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-09 13:28:42 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-09 13:28:42 +0000 |
| commit | b1c73532ee8997fe5dfbeb7d223027bdf99758a0 (patch) | |
| tree | 7d6e51c294ab6719475d660217aa0c0ad0526292 /llvm/lib/Transforms/Utils | |
| parent | 7fa27ce4a07f19b07799a767fc29416f3b625afb (diff) | |
Diffstat (limited to 'llvm/lib/Transforms/Utils')
56 files changed, 4399 insertions, 1632 deletions
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp index 2195406c144c..6ca737df49b9 100644 --- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp +++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -153,19 +153,17 @@ static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) { static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, Value *Length, bool isLast) { auto Int64Ty = Builder.getInt64Ty(); - auto CharPtrTy = Builder.getInt8PtrTy(); + auto PtrTy = Builder.getPtrTy(); auto Int32Ty = Builder.getInt32Ty(); auto M = Builder.GetInsertBlock()->getModule(); auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty, - Int64Ty, CharPtrTy, Int64Ty, Int32Ty); + Int64Ty, PtrTy, Int64Ty, Int32Ty); auto IsLastInt32 = Builder.getInt32(isLast); return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32}); } static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool IsLast) { - Arg = Builder.CreateBitCast( - Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace())); auto Length = getStrlenWithNull(Builder, Arg); return callAppendStringN(Builder, Desc, Arg, Length, IsLast); } @@ -299,9 +297,9 @@ static Value *callBufferedPrintfStart( Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind); Type *Tys_alloc[1] = {Builder.getInt32Ty()}; - Type *I8Ptr = - Builder.getInt8PtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace()); - FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); + Type *PtrTy = + Builder.getPtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace()); + FunctionType *FTy_alloc = FunctionType::get(PtrTy, Tys_alloc, false); auto PrintfAllocFn = M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); diff --git a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index 7d127400651e..f95d5e23c9c8 100644 --- a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -63,13 +63,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include <utility> diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index 45cf98e65a5a..efa8e874b955 100644 --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" -#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Transforms/Utils/Local.h" @@ -587,37 +586,3 @@ PreservedAnalyses AssumeBuilderPass::run(Function &F, PA.preserveSet<CFGAnalyses>(); return PA; } - -namespace { -class AssumeBuilderPassLegacyPass : public FunctionPass { -public: - static char ID; - - AssumeBuilderPassLegacyPass() : FunctionPass(ID) { - initializeAssumeBuilderPassLegacyPassPass(*PassRegistry::getPassRegistry()); - } - bool runOnFunction(Function &F) override { - AssumptionCache &AC = - getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - for (Instruction &I : instructions(F)) - salvageKnowledge(&I, &AC, DTWP ? &DTWP->getDomTree() : nullptr); - return true; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - - AU.setPreservesAll(); - } -}; -} // namespace - -char AssumeBuilderPassLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(AssumeBuilderPassLegacyPass, "assume-builder", - "Assume Builder", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(AssumeBuilderPassLegacyPass, "assume-builder", - "Assume Builder", false, false) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index f06ea89cc61d..b700edf8ea6c 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -194,7 +194,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // Don't break unwinding instructions or terminators with other side-effects. Instruction *PTI = PredBB->getTerminator(); - if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects()) + if (PTI->isSpecialTerminator() || PTI->mayHaveSideEffects()) return false; // Can't merge if there are multiple distinct successors. @@ -300,7 +300,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, PredBB->back().eraseFromParent(); // Move terminator instruction. - PredBB->splice(PredBB->end(), BB); + BB->back().moveBeforePreserving(*PredBB, PredBB->end()); // Terminator may be a memory accessing instruction too. if (MSSAU) @@ -382,7 +382,39 @@ bool llvm::MergeBlockSuccessorsIntoGivenBlocks( /// - Check fully overlapping fragments and not only identical fragments. /// - Support dbg.declare. dbg.label, and possibly other meta instructions being /// part of the sequence of consecutive instructions. +static bool DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { + SmallVector<DPValue *, 8> ToBeRemoved; + SmallDenseSet<DebugVariable> VariableSet; + for (auto &I : reverse(*BB)) { + for (DPValue &DPV : reverse(I.getDbgValueRange())) { + DebugVariable Key(DPV.getVariable(), DPV.getExpression(), + DPV.getDebugLoc()->getInlinedAt()); + auto R = VariableSet.insert(Key); + // If the same variable fragment is described more than once it is enough + // to keep the last one (i.e. the first found since we for reverse + // iteration). + // FIXME: add assignment tracking support (see parallel implementation + // below). + if (!R.second) + ToBeRemoved.push_back(&DPV); + continue; + } + // Sequence with consecutive dbg.value instrs ended. Clear the map to + // restart identifying redundant instructions if case we find another + // dbg.value sequence. + VariableSet.clear(); + } + + for (auto &DPV : ToBeRemoved) + DPV->eraseFromParent(); + + return !ToBeRemoved.empty(); +} + static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { + if (BB->IsNewDbgInfoFormat) + return DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BB); + SmallVector<DbgValueInst *, 8> ToBeRemoved; SmallDenseSet<DebugVariable> VariableSet; for (auto &I : reverse(*BB)) { @@ -440,7 +472,38 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { /// /// Possible improvements: /// - Keep track of non-overlapping fragments. +static bool DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { + SmallVector<DPValue *, 8> ToBeRemoved; + DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>> + VariableMap; + for (auto &I : *BB) { + for (DPValue &DPV : I.getDbgValueRange()) { + DebugVariable Key(DPV.getVariable(), std::nullopt, + DPV.getDebugLoc()->getInlinedAt()); + auto VMI = VariableMap.find(Key); + // Update the map if we found a new value/expression describing the + // variable, or if the variable wasn't mapped already. + SmallVector<Value *, 4> Values(DPV.location_ops()); + if (VMI == VariableMap.end() || VMI->second.first != Values || + VMI->second.second != DPV.getExpression()) { + VariableMap[Key] = {Values, DPV.getExpression()}; + continue; + } + // Found an identical mapping. Remember the instruction for later removal. + ToBeRemoved.push_back(&DPV); + } + } + + for (auto *DPV : ToBeRemoved) + DPV->eraseFromParent(); + + return !ToBeRemoved.empty(); +} + static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { + if (BB->IsNewDbgInfoFormat) + return DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BB); + SmallVector<DbgValueInst *, 8> ToBeRemoved; DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>> VariableMap; @@ -852,9 +915,11 @@ void llvm::createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, continue; // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create( - PN.getType(), Preds.size(), "split", - SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); + PHINode *NewPN = PHINode::Create(PN.getType(), Preds.size(), "split"); + BasicBlock::iterator InsertPos = + SplitBB->isLandingPad() ? SplitBB->begin() + : SplitBB->getTerminator()->getIterator(); + NewPN->insertBefore(InsertPos); for (BasicBlock *BB : Preds) NewPN->addIncoming(V, BB); @@ -877,7 +942,7 @@ llvm::SplitAllCriticalEdges(Function &F, return NumBroken; } -static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, +static BasicBlock *SplitBlockImpl(BasicBlock *Old, BasicBlock::iterator SplitPt, DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, const Twine &BBName, bool Before) { @@ -887,7 +952,7 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU, BBName); } - BasicBlock::iterator SplitIt = SplitPt->getIterator(); + BasicBlock::iterator SplitIt = SplitPt; while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) { ++SplitIt; assert(SplitIt != SplitPt->getParent()->end()); @@ -933,14 +998,14 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, return New; } -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, +BasicBlock *llvm::SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, const Twine &BBName, bool Before) { return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName, Before); } -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, +BasicBlock *llvm::SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, const Twine &BBName, bool Before) { @@ -948,12 +1013,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Before); } -BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, +BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, BasicBlock::iterator SplitPt, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, const Twine &BBName) { - BasicBlock::iterator SplitIt = SplitPt->getIterator(); + BasicBlock::iterator SplitIt = SplitPt; while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) ++SplitIt; std::string Name = BBName.str(); @@ -1137,14 +1202,11 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - - // NOTE! This loop walks backwards for a reason! First off, this minimizes - // the cost of removal if we end up removing a large number of values, and - // second off, this ensures that the indices for the incoming values - // aren't invalidated when we remove one. - for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) - if (PredSet.count(PN->getIncomingBlock(i))) - PN->removeIncomingValue(i, false); + PN->removeIncomingValueIf( + [&](unsigned Idx) { + return PredSet.contains(PN->getIncomingBlock(Idx)); + }, + /* DeletePHIIfEmpty */ false); // Add an incoming value to the PHI node in the loop for the preheader // edge. @@ -1394,17 +1456,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - return SplitLandingPadPredecessorsImpl( - OrigBB, Preds, Suffix1, Suffix2, NewBBs, - /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA); -} -void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix1, const char *Suffix2, - SmallVectorImpl<BasicBlock *> &NewBBs, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { @@ -1472,7 +1523,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, } Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, - Instruction *SplitBefore, + BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights, DomTreeUpdater *DTU, LoopInfo *LI, @@ -1485,7 +1536,7 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, } Instruction *llvm::SplitBlockAndInsertIfElse(Value *Cond, - Instruction *SplitBefore, + BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights, DomTreeUpdater *DTU, LoopInfo *LI, @@ -1497,7 +1548,7 @@ Instruction *llvm::SplitBlockAndInsertIfElse(Value *Cond, return ElseBlock->getTerminator(); } -void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, +void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights, @@ -1513,7 +1564,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, } void llvm::SplitBlockAndInsertIfThenElse( - Value *Cond, Instruction *SplitBefore, BasicBlock **ThenBlock, + Value *Cond, BasicBlock::iterator SplitBefore, BasicBlock **ThenBlock, BasicBlock **ElseBlock, bool UnreachableThen, bool UnreachableElse, MDNode *BranchWeights, DomTreeUpdater *DTU, LoopInfo *LI) { assert((ThenBlock || ElseBlock) && @@ -1530,7 +1581,7 @@ void llvm::SplitBlockAndInsertIfThenElse( } LLVMContext &C = Head->getContext(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); BasicBlock *TrueBlock = Tail; BasicBlock *FalseBlock = Tail; bool ThenToTailEdge = false; @@ -2077,3 +2128,25 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) { PBI->setCondition(NewCond); PBI->swapSuccessors(); } + +bool llvm::hasOnlySimpleTerminator(const Function &F) { + for (auto &BB : F) { + auto *Term = BB.getTerminator(); + if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) || + isa<BranchInst>(Term))) + return false; + } + return true; +} + +bool llvm::isPresplitCoroSuspendExitEdge(const BasicBlock &Src, + const BasicBlock &Dest) { + assert(Src.getParent() == Dest.getParent()); + if (!Src.getParent()->isPresplitCoroutine()) + return false; + if (auto *SW = dyn_cast<SwitchInst>(Src.getTerminator())) + if (auto *Intr = dyn_cast<IntrinsicInst>(SW->getCondition())) + return Intr->getIntrinsicID() == Intrinsic::coro_suspend && + SW->getDefaultDest() == &Dest; + return false; +} diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index ddb35756030f..5fb796cc3db6 100644 --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -387,7 +387,7 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F, if (ShouldUpdateAnalysis) { // Copy the BFI/BPI from Target to BodyBlock. BPI->setEdgeProbability(BodyBlock, EdgeProbabilities); - BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency()); + BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target)); } // It's possible Target was its own successor through an indirectbr. // In this case, the indirectbr now comes from BodyBlock. @@ -411,10 +411,10 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F, BPI->getEdgeProbability(Src, DirectSucc); } if (ShouldUpdateAnalysis) { - BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency()); + BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc); BlockFrequency NewBlockFreqForTarget = BFI->getBlockFreq(Target) - BlockFreqForDirectSucc; - BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency()); + BFI->setBlockFreq(Target, NewBlockFreqForTarget); } // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that @@ -449,8 +449,8 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F, // Create a PHI in the body block, to merge the direct and indirect // predecessors. - PHINode *MergePHI = - PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); + PHINode *MergePHI = PHINode::Create(IndPHI->getType(), 2, "merge"); + MergePHI->insertBefore(MergeInsert); MergePHI->addIncoming(NewIndPHI, Target); MergePHI->addIncoming(DirPHI, DirectSucc); diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 5de8ff84de77..12741dc5af5a 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1425,11 +1425,6 @@ StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI, //- Emit LibCalls ------------------------------------------------------------// -Value *llvm::castToCStr(Value *V, IRBuilderBase &B) { - unsigned AS = V->getType()->getPointerAddressSpace(); - return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); -} - static IntegerType *getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) { return B.getIntNTy(TLI->getIntSize()); } @@ -1461,63 +1456,64 @@ static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType, Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *CharPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_strlen, SizeTTy, - B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI); + return emitLibCall(LibFunc_strlen, SizeTTy, CharPtrTy, Ptr, B, TLI); } Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(), - castToCStr(Ptr, B), B, TLI); + Type *CharPtrTy = B.getPtrTy(); + return emitLibCall(LibFunc_strdup, CharPtrTy, CharPtrTy, Ptr, B, TLI); } Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); - return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, IntTy}, - {castToCStr(Ptr, B), ConstantInt::get(IntTy, C)}, B, TLI); + return emitLibCall(LibFunc_strchr, CharPtrTy, {CharPtrTy, IntTy}, + {Ptr, ConstantInt::get(IntTy, C)}, B, TLI); } Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { + Type *CharPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); return emitLibCall( LibFunc_strncmp, IntTy, - {B.getInt8PtrTy(), B.getInt8PtrTy(), SizeTTy}, - {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); + {CharPtrTy, CharPtrTy, SizeTTy}, + {Ptr1, Ptr2, Len}, B, TLI); } Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = Dst->getType(); - return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr}, - {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); + Type *CharPtrTy = Dst->getType(); + return emitLibCall(LibFunc_strcpy, CharPtrTy, {CharPtrTy, CharPtrTy}, + {Dst, Src}, B, TLI); } Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); - return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr}, - {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); + Type *CharPtrTy = B.getPtrTy(); + return emitLibCall(LibFunc_stpcpy, CharPtrTy, {CharPtrTy, CharPtrTy}, + {Dst, Src}, B, TLI); } Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy}, - {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); + return emitLibCall(LibFunc_strncpy, CharPtrTy, {CharPtrTy, CharPtrTy, SizeTTy}, + {Dst, Src, Len}, B, TLI); } Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy}, - {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); + return emitLibCall(LibFunc_stpncpy, CharPtrTy, {CharPtrTy, CharPtrTy, SizeTTy}, + {Dst, Src, Len}, B, TLI); } Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, @@ -1530,13 +1526,11 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, AttributeList AS; AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind); - Type *I8Ptr = B.getInt8PtrTy(); + Type *VoidPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk, - AttributeList::get(M->getContext(), AS), I8Ptr, - I8Ptr, I8Ptr, SizeTTy, SizeTTy); - Dst = castToCStr(Dst, B); - Src = castToCStr(Src, B); + AttributeList::get(M->getContext(), AS), VoidPtrTy, + VoidPtrTy, VoidPtrTy, SizeTTy, SizeTTy); CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); if (const Function *F = dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts())) @@ -1546,140 +1540,141 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *VoidPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_mempcpy, I8Ptr, - {I8Ptr, I8Ptr, SizeTTy}, + return emitLibCall(LibFunc_mempcpy, VoidPtrTy, + {VoidPtrTy, VoidPtrTy, SizeTTy}, {Dst, Src, Len}, B, TLI); } Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *VoidPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_memchr, I8Ptr, - {I8Ptr, IntTy, SizeTTy}, - {castToCStr(Ptr, B), Val, Len}, B, TLI); + return emitLibCall(LibFunc_memchr, VoidPtrTy, + {VoidPtrTy, IntTy, SizeTTy}, + {Ptr, Val, Len}, B, TLI); } Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *VoidPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_memrchr, I8Ptr, - {I8Ptr, IntTy, SizeTTy}, - {castToCStr(Ptr, B), Val, Len}, B, TLI); + return emitLibCall(LibFunc_memrchr, VoidPtrTy, + {VoidPtrTy, IntTy, SizeTTy}, + {Ptr, Val, Len}, B, TLI); } Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *VoidPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); return emitLibCall(LibFunc_memcmp, IntTy, - {I8Ptr, I8Ptr, SizeTTy}, - {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); + {VoidPtrTy, VoidPtrTy, SizeTTy}, + {Ptr1, Ptr2, Len}, B, TLI); } Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *VoidPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); return emitLibCall(LibFunc_bcmp, IntTy, - {I8Ptr, I8Ptr, SizeTTy}, - {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); + {VoidPtrTy, VoidPtrTy, SizeTTy}, + {Ptr1, Ptr2, Len}, B, TLI); } Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *VoidPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_memccpy, I8Ptr, - {I8Ptr, I8Ptr, IntTy, SizeTTy}, + return emitLibCall(LibFunc_memccpy, VoidPtrTy, + {VoidPtrTy, VoidPtrTy, IntTy, SizeTTy}, {Ptr1, Ptr2, Val, Len}, B, TLI); } Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt, ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); - SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)}; + SmallVector<Value *, 8> Args{Dest, Size, Fmt}; llvm::append_range(Args, VariadicArgs); return emitLibCall(LibFunc_snprintf, IntTy, - {I8Ptr, SizeTTy, I8Ptr}, + {CharPtrTy, SizeTTy, CharPtrTy}, Args, B, TLI, /*IsVaArgs=*/true); } Value *llvm::emitSPrintf(Value *Dest, Value *Fmt, ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); - SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)}; + SmallVector<Value *, 8> Args{Dest, Fmt}; llvm::append_range(Args, VariadicArgs); return emitLibCall(LibFunc_sprintf, IntTy, - {I8Ptr, I8Ptr}, Args, B, TLI, + {CharPtrTy, CharPtrTy}, Args, B, TLI, /*IsVaArgs=*/true); } Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(), - {B.getInt8PtrTy(), B.getInt8PtrTy()}, - {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI); + Type *CharPtrTy = B.getPtrTy(); + return emitLibCall(LibFunc_strcat, CharPtrTy, + {CharPtrTy, CharPtrTy}, + {Dest, Src}, B, TLI); } Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); return emitLibCall(LibFunc_strlcpy, SizeTTy, - {I8Ptr, I8Ptr, SizeTTy}, - {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); + {CharPtrTy, CharPtrTy, SizeTTy}, + {Dest, Src, Size}, B, TLI); } Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); return emitLibCall(LibFunc_strlcat, SizeTTy, - {I8Ptr, I8Ptr, SizeTTy}, - {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); + {CharPtrTy, CharPtrTy, SizeTTy}, + {Dest, Src, Size}, B, TLI); } Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *SizeTTy = getSizeTTy(B, TLI); - return emitLibCall(LibFunc_strncat, I8Ptr, - {I8Ptr, I8Ptr, SizeTTy}, - {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); + return emitLibCall(LibFunc_strncat, CharPtrTy, + {CharPtrTy, CharPtrTy, SizeTTy}, + {Dest, Src, Size}, B, TLI); } Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); Type *SizeTTy = getSizeTTy(B, TLI); return emitLibCall( LibFunc_vsnprintf, IntTy, - {I8Ptr, SizeTTy, I8Ptr, VAList->getType()}, - {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI); + {CharPtrTy, SizeTTy, CharPtrTy, VAList->getType()}, + {Dest, Size, Fmt, VAList}, B, TLI); } Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, IRBuilderBase &B, const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); + Type *CharPtrTy = B.getPtrTy(); Type *IntTy = getIntTy(B, TLI); return emitLibCall(LibFunc_vsprintf, IntTy, - {I8Ptr, I8Ptr, VAList->getType()}, - {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI); + {CharPtrTy, CharPtrTy, VAList->getType()}, + {Dest, Fmt, VAList}, B, TLI); } /// Append a suffix to the function name according to the type of 'Op'. @@ -1829,9 +1824,9 @@ Value *llvm::emitPutS(Value *Str, IRBuilderBase &B, Type *IntTy = getIntTy(B, TLI); StringRef PutsName = TLI->getName(LibFunc_puts); FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, IntTy, - B.getInt8PtrTy()); + B.getPtrTy()); inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI); - CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName); + CallInst *CI = B.CreateCall(PutS, Str, PutsName); if (const Function *F = dyn_cast<Function>(PutS.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1867,10 +1862,10 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B, Type *IntTy = getIntTy(B, TLI); StringRef FPutsName = TLI->getName(LibFunc_fputs); FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, IntTy, - B.getInt8PtrTy(), File->getType()); + B.getPtrTy(), File->getType()); if (File->getType()->isPointerTy()) inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI); - CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName); + CallInst *CI = B.CreateCall(F, {Str, File}, FPutsName); if (const Function *Fn = dyn_cast<Function>(F.getCallee()->stripPointerCasts())) @@ -1887,13 +1882,13 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B, Type *SizeTTy = getSizeTTy(B, TLI); StringRef FWriteName = TLI->getName(LibFunc_fwrite); FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite, - SizeTTy, B.getInt8PtrTy(), SizeTTy, + SizeTTy, B.getPtrTy(), SizeTTy, SizeTTy, File->getType()); if (File->getType()->isPointerTy()) inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI); CallInst *CI = - B.CreateCall(F, {castToCStr(Ptr, B), Size, + B.CreateCall(F, {Ptr, Size, ConstantInt::get(SizeTTy, 1), File}); if (const Function *Fn = @@ -1911,7 +1906,7 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL, StringRef MallocName = TLI->getName(LibFunc_malloc); Type *SizeTTy = getSizeTTy(B, TLI); FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc, - B.getInt8PtrTy(), SizeTTy); + B.getPtrTy(), SizeTTy); inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI); CallInst *CI = B.CreateCall(Malloc, Num, MallocName); @@ -1931,7 +1926,7 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, StringRef CallocName = TLI.getName(LibFunc_calloc); Type *SizeTTy = getSizeTTy(B, &TLI); FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc, - B.getInt8PtrTy(), SizeTTy, SizeTTy); + B.getPtrTy(), SizeTTy, SizeTTy); inferNonMandatoryLibFuncAttrs(M, CallocName, TLI); CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName); @@ -1950,7 +1945,7 @@ Value *llvm::emitHotColdNew(Value *Num, IRBuilderBase &B, return nullptr; StringRef Name = TLI->getName(NewFunc); - FunctionCallee Func = M->getOrInsertFunction(Name, B.getInt8PtrTy(), + FunctionCallee Func = M->getOrInsertFunction(Name, B.getPtrTy(), Num->getType(), B.getInt8Ty()); inferNonMandatoryLibFuncAttrs(M, Name, *TLI); CallInst *CI = B.CreateCall(Func, {Num, B.getInt8(HotCold)}, Name); @@ -1971,7 +1966,7 @@ Value *llvm::emitHotColdNewNoThrow(Value *Num, Value *NoThrow, IRBuilderBase &B, StringRef Name = TLI->getName(NewFunc); FunctionCallee Func = - M->getOrInsertFunction(Name, B.getInt8PtrTy(), Num->getType(), + M->getOrInsertFunction(Name, B.getPtrTy(), Num->getType(), NoThrow->getType(), B.getInt8Ty()); inferNonMandatoryLibFuncAttrs(M, Name, *TLI); CallInst *CI = B.CreateCall(Func, {Num, NoThrow, B.getInt8(HotCold)}, Name); @@ -1992,7 +1987,7 @@ Value *llvm::emitHotColdNewAligned(Value *Num, Value *Align, IRBuilderBase &B, StringRef Name = TLI->getName(NewFunc); FunctionCallee Func = M->getOrInsertFunction( - Name, B.getInt8PtrTy(), Num->getType(), Align->getType(), B.getInt8Ty()); + Name, B.getPtrTy(), Num->getType(), Align->getType(), B.getInt8Ty()); inferNonMandatoryLibFuncAttrs(M, Name, *TLI); CallInst *CI = B.CreateCall(Func, {Num, Align, B.getInt8(HotCold)}, Name); @@ -2013,7 +2008,7 @@ Value *llvm::emitHotColdNewAlignedNoThrow(Value *Num, Value *Align, StringRef Name = TLI->getName(NewFunc); FunctionCallee Func = M->getOrInsertFunction( - Name, B.getInt8PtrTy(), Num->getType(), Align->getType(), + Name, B.getPtrTy(), Num->getType(), Align->getType(), NoThrow->getType(), B.getInt8Ty()); inferNonMandatoryLibFuncAttrs(M, Name, *TLI); CallInst *CI = diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index b488e3bb0cbd..e42cdab64446 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -111,7 +111,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty()) return; - Builder.SetInsertPoint(&MergeBlock->front()); + Builder.SetInsertPoint(MergeBlock, MergeBlock->begin()); PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0); SmallVector<User *, 16> UsersToUpdate(OrigInst->users()); for (User *U : UsersToUpdate) diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index a1ee3df907ec..fb4d82885377 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -30,6 +30,7 @@ #include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopAnalysisManager.h" diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index d55208602b71..c0f333364fa5 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -44,6 +44,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo, DebugInfoFinder *DIFinder) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); + NewBB->IsNewDbgInfoFormat = BB->IsNewDbgInfoFormat; if (BB->hasName()) NewBB->setName(BB->getName() + NameSuffix); @@ -58,7 +59,10 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, Instruction *NewInst = I.clone(); if (I.hasName()) NewInst->setName(I.getName() + NameSuffix); - NewInst->insertInto(NewBB, NewBB->end()); + + NewInst->insertBefore(*NewBB, NewBB->end()); + NewInst->cloneDebugInfoFrom(&I); + VMap[&I] = NewInst; // Add instruction map to value. if (isa<CallInst>(I) && !I.isDebugOrPseudoInst()) { @@ -90,6 +94,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const char *NameSuffix, ClonedCodeInfo *CodeInfo, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { + NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat); assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG @@ -267,9 +272,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), BE = NewFunc->end(); BB != BE; ++BB) - // Loop over all instructions, fixing each one as we find it... - for (Instruction &II : *BB) + // Loop over all instructions, fixing each one as we find it, and any + // attached debug-info records. + for (Instruction &II : *BB) { RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer); + RemapDPValueRange(II.getModule(), II.getDbgValueRange(), VMap, RemapFlag, + TypeMapper, Materializer); + } // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the // same module, the compile unit will already be listed (or not). When @@ -327,6 +336,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, // Create the new function... Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(), F->getName(), F->getParent()); + NewF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat); // Loop over the arguments, copying the names of the mapped arguments over... Function::arg_iterator DestI = NewF->arg_begin(); @@ -472,6 +482,7 @@ void PruningFunctionCloner::CloneBlock( BasicBlock *NewBB; Twine NewName(BB->hasName() ? Twine(BB->getName()) + NameSuffix : ""); BBEntry = NewBB = BasicBlock::Create(BB->getContext(), NewName, NewFunc); + NewBB->IsNewDbgInfoFormat = BB->IsNewDbgInfoFormat; // It is only legal to clone a function if a block address within that // function is never referenced outside of the function. Given that, we @@ -491,6 +502,22 @@ void PruningFunctionCloner::CloneBlock( bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; bool hasMemProfMetadata = false; + // Keep a cursor pointing at the last place we cloned debug-info records from. + BasicBlock::const_iterator DbgCursor = StartingInst; + auto CloneDbgRecordsToHere = + [NewBB, &DbgCursor](Instruction *NewInst, BasicBlock::const_iterator II) { + if (!NewBB->IsNewDbgInfoFormat) + return; + + // Clone debug-info records onto this instruction. Iterate through any + // source-instructions we've cloned and then subsequently optimised + // away, so that their debug-info doesn't go missing. + for (; DbgCursor != II; ++DbgCursor) + NewInst->cloneDebugInfoFrom(&*DbgCursor, std::nullopt, false); + NewInst->cloneDebugInfoFrom(&*II); + DbgCursor = std::next(II); + }; + // Loop over all instructions, and copy them over, DCE'ing as we go. This // loop doesn't include the terminator. for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; @@ -540,6 +567,8 @@ void PruningFunctionCloner::CloneBlock( hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_memprof); } + CloneDbgRecordsToHere(NewInst, II); + if (CodeInfo) { CodeInfo->OrigVMap[&*II] = NewInst; if (auto *CB = dyn_cast<CallBase>(&*II)) @@ -597,6 +626,9 @@ void PruningFunctionCloner::CloneBlock( if (OldTI->hasName()) NewInst->setName(OldTI->getName() + NameSuffix); NewInst->insertInto(NewBB, NewBB->end()); + + CloneDbgRecordsToHere(NewInst, OldTI->getIterator()); + VMap[OldTI] = NewInst; // Add instruction map to value. if (CodeInfo) { @@ -608,6 +640,13 @@ void PruningFunctionCloner::CloneBlock( // Recursively clone any reachable successor blocks. append_range(ToClone, successors(BB->getTerminator())); + } else { + // If we didn't create a new terminator, clone DPValues from the old + // terminator onto the new terminator. + Instruction *NewInst = NewBB->getTerminator(); + assert(NewInst); + + CloneDbgRecordsToHere(NewInst, OldTI->getIterator()); } if (CodeInfo) { @@ -845,12 +884,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, TypeMapper, Materializer); } + // Do the same for DPValues, touching all the instructions in the cloned + // range of blocks. + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); + for (BasicBlock &BB : make_range(Begin, NewFunc->end())) { + for (Instruction &I : BB) { + RemapDPValueRange(I.getModule(), I.getDbgValueRange(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); + } + } + // Simplify conditional branches and switches with a constant operand. We try // to prune these out when cloning, but if the simplification required // looking through PHI nodes, those are only available after forming the full // basic block. That may leave some here, and we still want to prune the dead // code as early as possible. - Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); for (BasicBlock &BB : make_range(Begin, NewFunc->end())) ConstantFoldTerminator(&BB); @@ -939,10 +988,14 @@ void llvm::CloneAndPruneFunctionInto( void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks, ValueToValueMapTy &VMap) { // Rewrite the code to refer to itself. - for (auto *BB : Blocks) - for (auto &Inst : *BB) + for (auto *BB : Blocks) { + for (auto &Inst : *BB) { + RemapDPValueRange(Inst.getModule(), Inst.getDbgValueRange(), VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); RemapInstruction(&Inst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + } + } } /// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p @@ -1066,6 +1119,7 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween( Instruction *New = BI->clone(); New->setName(BI->getName()); New->insertBefore(NewTerm); + New->cloneDebugInfoFrom(&*BI); ValueMapping[&*BI] = New; // Remap operands to patch up intra-block references. diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp index 55e051298a9a..00e40fe73d90 100644 --- a/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -34,6 +34,8 @@ static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { /// copies of global variables and functions, and making their (initializers and /// references, respectively) refer to the right globals. /// +/// Cloning un-materialized modules is not currently supported, so any +/// modules initialized via lazy loading should be materialized before cloning std::unique_ptr<Module> llvm::CloneModule(const Module &M) { // Create the value map that maps things from the old module over to the new // module. @@ -49,6 +51,9 @@ std::unique_ptr<Module> llvm::CloneModule(const Module &M, std::unique_ptr<Module> llvm::CloneModule( const Module &M, ValueToValueMapTy &VMap, function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) { + + assert(M.isMaterialized() && "Module must be materialized before cloning!"); + // First off, we need to create the new module. std::unique_ptr<Module> New = std::make_unique<Module>(M.getModuleIdentifier(), M.getContext()); @@ -56,6 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule( New->setDataLayout(M.getDataLayout()); New->setTargetTriple(M.getTargetTriple()); New->setModuleInlineAsm(M.getModuleInlineAsm()); + New->IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; // Loop over all of the global variables, making corresponding globals in the // new module. Here we add them to the VMap and to the new Module. We diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c390af351a69..9c1186232e02 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -245,12 +245,13 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - BasicBlock *AllocationBlock, std::string Suffix) + BasicBlock *AllocationBlock, std::string Suffix, + bool ArgsInZeroAddressSpace) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), - Suffix(Suffix) {} + Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, @@ -567,7 +568,7 @@ void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, for (Instruction *I : LifetimeBitcastUsers) { Module *M = AIFunc->getParent(); LLVMContext &Ctx = M->getContext(); - auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); + auto *Int8PtrTy = PointerType::getUnqual(Ctx); CastInst *CastI = CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I); I->replaceUsesOfWith(I->getOperand(1), CastI); @@ -721,7 +722,8 @@ void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) { // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, - PN->getName() + ".ce", &NewBB->front()); + PN->getName() + ".ce"); + NewPN->insertBefore(NewBB->begin()); PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); @@ -766,6 +768,7 @@ void CodeExtractor::severSplitPHINodesOfExits( NewBB = BasicBlock::Create(ExitBB->getContext(), ExitBB->getName() + ".split", ExitBB->getParent(), ExitBB); + NewBB->IsNewDbgInfoFormat = ExitBB->IsNewDbgInfoFormat; SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB)); for (BasicBlock *PredBB : Preds) if (Blocks.count(PredBB)) @@ -775,9 +778,9 @@ void CodeExtractor::severSplitPHINodesOfExits( } // Split this PHI. - PHINode *NewPN = - PHINode::Create(PN.getType(), IncomingVals.size(), - PN.getName() + ".ce", NewBB->getFirstNonPHI()); + PHINode *NewPN = PHINode::Create(PN.getType(), IncomingVals.size(), + PN.getName() + ".ce"); + NewPN->insertBefore(NewBB->getFirstNonPHIIt()); for (unsigned i : IncomingVals) NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i)); for (unsigned i : reverse(IncomingVals)) @@ -865,7 +868,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, StructType *StructTy = nullptr; if (AggregateArgs && !AggParamTy.empty()) { StructTy = StructType::get(M->getContext(), AggParamTy); - ParamTy.push_back(PointerType::get(StructTy, DL.getAllocaAddrSpace())); + ParamTy.push_back(PointerType::get( + StructTy, ArgsInZeroAddressSpace ? 0 : DL.getAllocaAddrSpace())); } LLVM_DEBUG({ @@ -886,6 +890,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Function *newFunction = Function::Create( funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), oldFunction->getName() + "." + SuffixToUse, M); + newFunction->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat; // Inherit all of the target dependent attributes and white-listed // target independent attributes. @@ -919,6 +924,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::PresplitCoroutine: case Attribute::Memory: case Attribute::NoFPClass: + case Attribute::CoroDestroyOnlyWhenComplete: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: @@ -940,6 +946,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NoSanitizeBounds: case Attribute::NoSanitizeCoverage: case Attribute::NullPointerIsValid: + case Attribute::OptimizeForDebugging: case Attribute::OptForFuzzing: case Attribute::OptimizeNone: case Attribute::OptimizeForSize: @@ -990,6 +997,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::ImmArg: case Attribute::ByRef: case Attribute::WriteOnly: + case Attribute::Writable: // These are not really attributes. case Attribute::None: case Attribute::EndAttrKinds: @@ -1185,8 +1193,15 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", AllocationBlock ? &*AllocationBlock->getFirstInsertionPt() : &codeReplacer->getParent()->front().front()); - params.push_back(Struct); + if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) { + auto *StructSpaceCast = new AddrSpaceCastInst( + Struct, PointerType ::get(Context, 0), "structArg.ascast"); + StructSpaceCast->insertAfter(Struct); + params.push_back(StructSpaceCast); + } else { + params.push_back(Struct); + } // Store aggregated inputs in the struct. for (unsigned i = 0, e = StructValues.size(); i != e; ++i) { if (inputs.contains(StructValues[i])) { @@ -1492,10 +1507,14 @@ void CodeExtractor::calculateNewCallTerminatorWeights( static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) { for (Instruction &I : instructions(F)) { SmallVector<DbgVariableIntrinsic *, 4> DbgUsers; - findDbgUsers(DbgUsers, &I); + SmallVector<DPValue *, 4> DPValues; + findDbgUsers(DbgUsers, &I, &DPValues); for (DbgVariableIntrinsic *DVI : DbgUsers) if (DVI->getFunction() != &F) DVI->eraseFromParent(); + for (DPValue *DPV : DPValues) + if (DPV->getFunction() != &F) + DPV->eraseFromParent(); } } @@ -1531,6 +1550,16 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags); NewFunc.setSubprogram(NewSP); + auto IsInvalidLocation = [&NewFunc](Value *Location) { + // Location is invalid if it isn't a constant or an instruction, or is an + // instruction but isn't in the new function. + if (!Location || + (!isa<Constant>(Location) && !isa<Instruction>(Location))) + return true; + Instruction *LocationInst = dyn_cast<Instruction>(Location); + return LocationInst && LocationInst->getFunction() != &NewFunc; + }; + // Debug intrinsics in the new function need to be updated in one of two // ways: // 1) They need to be deleted, because they describe a value in the old @@ -1539,8 +1568,41 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, // point to a variable in the wrong scope. SmallDenseMap<DINode *, DINode *> RemappedMetadata; SmallVector<Instruction *, 4> DebugIntrinsicsToDelete; + SmallVector<DPValue *, 4> DPVsToDelete; DenseMap<const MDNode *, MDNode *> Cache; + + auto GetUpdatedDIVariable = [&](DILocalVariable *OldVar) { + DINode *&NewVar = RemappedMetadata[OldVar]; + if (!NewVar) { + DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( + *OldVar->getScope(), *NewSP, Ctx, Cache); + NewVar = DIB.createAutoVariable( + NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(), + OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero, + OldVar->getAlignInBits()); + } + return cast<DILocalVariable>(NewVar); + }; + + auto UpdateDPValuesOnInst = [&](Instruction &I) -> void { + for (auto &DPV : I.getDbgValueRange()) { + // Apply the two updates that dbg.values get: invalid operands, and + // variable metadata fixup. + // FIXME: support dbg.assign form of DPValues. + if (any_of(DPV.location_ops(), IsInvalidLocation)) { + DPVsToDelete.push_back(&DPV); + continue; + } + if (!DPV.getDebugLoc().getInlinedAt()) + DPV.setVariable(GetUpdatedDIVariable(DPV.getVariable())); + DPV.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DPV.getDebugLoc(), + *NewSP, Ctx, Cache)); + } + }; + for (Instruction &I : instructions(NewFunc)) { + UpdateDPValuesOnInst(I); + auto *DII = dyn_cast<DbgInfoIntrinsic>(&I); if (!DII) continue; @@ -1562,41 +1624,28 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, continue; } - auto IsInvalidLocation = [&NewFunc](Value *Location) { - // Location is invalid if it isn't a constant or an instruction, or is an - // instruction but isn't in the new function. - if (!Location || - (!isa<Constant>(Location) && !isa<Instruction>(Location))) - return true; - Instruction *LocationInst = dyn_cast<Instruction>(Location); - return LocationInst && LocationInst->getFunction() != &NewFunc; - }; - auto *DVI = cast<DbgVariableIntrinsic>(DII); // If any of the used locations are invalid, delete the intrinsic. if (any_of(DVI->location_ops(), IsInvalidLocation)) { DebugIntrinsicsToDelete.push_back(DVI); continue; } + // DbgAssign intrinsics have an extra Value argument: + if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI); + DAI && IsInvalidLocation(DAI->getAddress())) { + DebugIntrinsicsToDelete.push_back(DVI); + continue; + } // If the variable was in the scope of the old function, i.e. it was not // inlined, point the intrinsic to a fresh variable within the new function. - if (!DVI->getDebugLoc().getInlinedAt()) { - DILocalVariable *OldVar = DVI->getVariable(); - DINode *&NewVar = RemappedMetadata[OldVar]; - if (!NewVar) { - DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( - *OldVar->getScope(), *NewSP, Ctx, Cache); - NewVar = DIB.createAutoVariable( - NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(), - OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero, - OldVar->getAlignInBits()); - } - DVI->setVariable(cast<DILocalVariable>(NewVar)); - } + if (!DVI->getDebugLoc().getInlinedAt()) + DVI->setVariable(GetUpdatedDIVariable(DVI->getVariable())); } for (auto *DII : DebugIntrinsicsToDelete) DII->eraseFromParent(); + for (auto *DPV : DPVsToDelete) + DPV->getMarker()->MarkedInstr->dropOneDbgValue(DPV); DIB.finalizeSubprogram(NewSP); // Fix up the scope information attached to the line locations in the new @@ -1702,11 +1751,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); + codeReplacer->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat; // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot"); + newFuncRoot->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat; + auto *BranchI = BranchInst::Create(header); // If the original function has debug info, we have to add a debug location // to the new branch instruction from the artificial entry block. @@ -1772,11 +1824,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Update the entry count of the function. if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + auto Count = BFI->getProfileCountFromFreq(EntryFreq); if (Count) newFunction->setEntryCount( ProfileCount(*Count, Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + BFI->setBlockFreq(codeReplacer, EntryFreq); } CallInst *TheCall = diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index ac74a1c116cc..95edd27c675d 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -45,8 +45,11 @@ #include "llvm/Support/Debug.h" #include <cmath> +#include <set> using namespace llvm; +using namespace llvm::codelayout; + #define DEBUG_TYPE "code-layout" namespace llvm { @@ -61,8 +64,8 @@ cl::opt<bool> ApplyExtTspWithoutProfile( cl::init(true), cl::Hidden); } // namespace llvm -// Algorithm-specific params. The values are tuned for the best performance -// of large-scale front-end bound binaries. +// Algorithm-specific params for Ext-TSP. The values are tuned for the best +// performance of large-scale front-end bound binaries. static cl::opt<double> ForwardWeightCond( "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of conditional forward jumps for ExtTSP value")); @@ -96,10 +99,10 @@ static cl::opt<unsigned> BackwardDistance( cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); // The maximum size of a chain created by the algorithm. The size is bounded -// so that the algorithm can efficiently process extremely large instance. +// so that the algorithm can efficiently process extremely large instances. static cl::opt<unsigned> - MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), - cl::desc("The maximum size of a chain to create.")); + MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512), + cl::desc("The maximum size of a chain to create")); // The maximum size of a chain for splitting. Larger values of the threshold // may yield better quality at the cost of worsen run-time. @@ -107,11 +110,29 @@ static cl::opt<unsigned> ChainSplitThreshold( "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), cl::desc("The maximum size of a chain to apply splitting")); -// The option enables splitting (large) chains along in-coming and out-going -// jumps. This typically results in a better quality. -static cl::opt<bool> EnableChainSplitAlongJumps( - "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true), - cl::desc("The maximum size of a chain to apply splitting")); +// The maximum ratio between densities of two chains for merging. +static cl::opt<double> MaxMergeDensityRatio( + "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(100), + cl::desc("The maximum ratio between densities of two chains for merging")); + +// Algorithm-specific options for CDSort. +static cl::opt<unsigned> CacheEntries("cdsort-cache-entries", cl::ReallyHidden, + cl::desc("The size of the cache")); + +static cl::opt<unsigned> CacheSize("cdsort-cache-size", cl::ReallyHidden, + cl::desc("The size of a line in the cache")); + +static cl::opt<unsigned> + CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden, + cl::desc("The maximum size of a chain to create")); + +static cl::opt<double> DistancePower( + "cdsort-distance-power", cl::ReallyHidden, + cl::desc("The power exponent for the distance-based locality")); + +static cl::opt<double> FrequencyScale( + "cdsort-frequency-scale", cl::ReallyHidden, + cl::desc("The scale factor for the frequency-based locality")); namespace { @@ -199,11 +220,14 @@ struct NodeT { NodeT &operator=(const NodeT &) = delete; NodeT &operator=(NodeT &&) = default; - explicit NodeT(size_t Index, uint64_t Size, uint64_t EC) - : Index(Index), Size(Size), ExecutionCount(EC) {} + explicit NodeT(size_t Index, uint64_t Size, uint64_t Count) + : Index(Index), Size(Size), ExecutionCount(Count) {} bool isEntry() const { return Index == 0; } + // Check if Other is a successor of the node. + bool isSuccessor(const NodeT *Other) const; + // The total execution count of outgoing jumps. uint64_t outCount() const; @@ -267,7 +291,7 @@ struct ChainT { size_t numBlocks() const { return Nodes.size(); } - double density() const { return static_cast<double>(ExecutionCount) / Size; } + double density() const { return ExecutionCount / Size; } bool isEntry() const { return Nodes[0]->Index == 0; } @@ -280,9 +304,9 @@ struct ChainT { } ChainEdge *getEdge(ChainT *Other) const { - for (auto It : Edges) { - if (It.first == Other) - return It.second; + for (const auto &[Chain, ChainEdge] : Edges) { + if (Chain == Other) + return ChainEdge; } return nullptr; } @@ -302,13 +326,13 @@ struct ChainT { Edges.push_back(std::make_pair(Other, Edge)); } - void merge(ChainT *Other, const std::vector<NodeT *> &MergedBlocks) { - Nodes = MergedBlocks; - // Update the chain's data + void merge(ChainT *Other, std::vector<NodeT *> MergedBlocks) { + Nodes = std::move(MergedBlocks); + // Update the chain's data. ExecutionCount += Other->ExecutionCount; Size += Other->Size; Id = Nodes[0]->Index; - // Update the node's data + // Update the node's data. for (size_t Idx = 0; Idx < Nodes.size(); Idx++) { Nodes[Idx]->CurChain = this; Nodes[Idx]->CurIndex = Idx; @@ -328,8 +352,9 @@ struct ChainT { uint64_t Id; // Cached ext-tsp score for the chain. double Score{0}; - // The total execution count of the chain. - uint64_t ExecutionCount{0}; + // The total execution count of the chain. Since the execution count of + // a basic block is uint64_t, using doubles here to avoid overflow. + double ExecutionCount{0}; // The total size of the chain. uint64_t Size{0}; // Nodes of the chain. @@ -340,7 +365,7 @@ struct ChainT { /// An edge in the graph representing jumps between two chains. /// When nodes are merged into chains, the edges are combined too so that -/// there is always at most one edge between a pair of chains +/// there is always at most one edge between a pair of chains. struct ChainEdge { ChainEdge(const ChainEdge &) = delete; ChainEdge(ChainEdge &&) = default; @@ -424,53 +449,57 @@ private: bool CacheValidBackward{false}; }; +bool NodeT::isSuccessor(const NodeT *Other) const { + for (JumpT *Jump : OutJumps) + if (Jump->Target == Other) + return true; + return false; +} + uint64_t NodeT::outCount() const { uint64_t Count = 0; - for (JumpT *Jump : OutJumps) { + for (JumpT *Jump : OutJumps) Count += Jump->ExecutionCount; - } return Count; } uint64_t NodeT::inCount() const { uint64_t Count = 0; - for (JumpT *Jump : InJumps) { + for (JumpT *Jump : InJumps) Count += Jump->ExecutionCount; - } return Count; } void ChainT::mergeEdges(ChainT *Other) { - // Update edges adjacent to chain Other - for (auto EdgeIt : Other->Edges) { - ChainT *DstChain = EdgeIt.first; - ChainEdge *DstEdge = EdgeIt.second; + // Update edges adjacent to chain Other. + for (const auto &[DstChain, DstEdge] : Other->Edges) { ChainT *TargetChain = DstChain == Other ? this : DstChain; ChainEdge *CurEdge = getEdge(TargetChain); if (CurEdge == nullptr) { DstEdge->changeEndpoint(Other, this); this->addEdge(TargetChain, DstEdge); - if (DstChain != this && DstChain != Other) { + if (DstChain != this && DstChain != Other) DstChain->addEdge(this, DstEdge); - } } else { CurEdge->moveJumps(DstEdge); } - // Cleanup leftover edge - if (DstChain != Other) { + // Cleanup leftover edge. + if (DstChain != Other) DstChain->removeEdge(Other); - } } } using NodeIter = std::vector<NodeT *>::const_iterator; +static std::vector<NodeT *> EmptyList; -/// A wrapper around three chains of nodes; it is used to avoid extra -/// instantiation of the vectors. -struct MergedChain { - MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(), - NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(), - NodeIter End3 = NodeIter()) +/// A wrapper around three concatenated vectors (chains) of nodes; it is used +/// to avoid extra instantiation of the vectors. +struct MergedNodesT { + MergedNodesT(NodeIter Begin1, NodeIter End1, + NodeIter Begin2 = EmptyList.begin(), + NodeIter End2 = EmptyList.end(), + NodeIter Begin3 = EmptyList.begin(), + NodeIter End3 = EmptyList.end()) : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3), End3(End3) {} @@ -504,15 +533,35 @@ private: NodeIter End3; }; +/// A wrapper around two concatenated vectors (chains) of jumps. +struct MergedJumpsT { + MergedJumpsT(const std::vector<JumpT *> *Jumps1, + const std::vector<JumpT *> *Jumps2 = nullptr) { + assert(!Jumps1->empty() && "cannot merge empty jump list"); + JumpArray[0] = Jumps1; + JumpArray[1] = Jumps2; + } + + template <typename F> void forEach(const F &Func) const { + for (auto Jumps : JumpArray) + if (Jumps != nullptr) + for (JumpT *Jump : *Jumps) + Func(Jump); + } + +private: + std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr}; +}; + /// Merge two chains of nodes respecting a given 'type' and 'offset'. /// /// If MergeType == 0, then the result is a concatenation of two chains. /// Otherwise, the first chain is cut into two sub-chains at the offset, /// and merged using all possible ways of concatenating three chains. -MergedChain mergeNodes(const std::vector<NodeT *> &X, - const std::vector<NodeT *> &Y, size_t MergeOffset, - MergeTypeT MergeType) { - // Split the first chain, X, into X1 and X2 +MergedNodesT mergeNodes(const std::vector<NodeT *> &X, + const std::vector<NodeT *> &Y, size_t MergeOffset, + MergeTypeT MergeType) { + // Split the first chain, X, into X1 and X2. NodeIter BeginX1 = X.begin(); NodeIter EndX1 = X.begin() + MergeOffset; NodeIter BeginX2 = X.begin() + MergeOffset; @@ -520,18 +569,18 @@ MergedChain mergeNodes(const std::vector<NodeT *> &X, NodeIter BeginY = Y.begin(); NodeIter EndY = Y.end(); - // Construct a new chain from the three existing ones + // Construct a new chain from the three existing ones. switch (MergeType) { case MergeTypeT::X_Y: - return MergedChain(BeginX1, EndX2, BeginY, EndY); + return MergedNodesT(BeginX1, EndX2, BeginY, EndY); case MergeTypeT::Y_X: - return MergedChain(BeginY, EndY, BeginX1, EndX2); + return MergedNodesT(BeginY, EndY, BeginX1, EndX2); case MergeTypeT::X1_Y_X2: - return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); + return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); case MergeTypeT::Y_X2_X1: - return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); + return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); case MergeTypeT::X2_X1_Y: - return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); + return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); } llvm_unreachable("unexpected chain merge type"); } @@ -539,15 +588,14 @@ MergedChain mergeNodes(const std::vector<NodeT *> &X, /// The implementation of the ExtTSP algorithm. class ExtTSPImpl { public: - ExtTSPImpl(const std::vector<uint64_t> &NodeSizes, - const std::vector<uint64_t> &NodeCounts, - const std::vector<EdgeCountT> &EdgeCounts) + ExtTSPImpl(ArrayRef<uint64_t> NodeSizes, ArrayRef<uint64_t> NodeCounts, + ArrayRef<EdgeCount> EdgeCounts) : NumNodes(NodeSizes.size()) { initialize(NodeSizes, NodeCounts, EdgeCounts); } /// Run the algorithm and return an optimized ordering of nodes. - void run(std::vector<uint64_t> &Result) { + std::vector<uint64_t> run() { // Pass 1: Merge nodes with their mutually forced successors mergeForcedPairs(); @@ -558,78 +606,80 @@ public: mergeColdChains(); // Collect nodes from all chains - concatChains(Result); + return concatChains(); } private: /// Initialize the algorithm's data structures. - void initialize(const std::vector<uint64_t> &NodeSizes, - const std::vector<uint64_t> &NodeCounts, - const std::vector<EdgeCountT> &EdgeCounts) { - // Initialize nodes + void initialize(const ArrayRef<uint64_t> &NodeSizes, + const ArrayRef<uint64_t> &NodeCounts, + const ArrayRef<EdgeCount> &EdgeCounts) { + // Initialize nodes. AllNodes.reserve(NumNodes); for (uint64_t Idx = 0; Idx < NumNodes; Idx++) { uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL); uint64_t ExecutionCount = NodeCounts[Idx]; - // The execution count of the entry node is set to at least one + // The execution count of the entry node is set to at least one. if (Idx == 0 && ExecutionCount == 0) ExecutionCount = 1; AllNodes.emplace_back(Idx, Size, ExecutionCount); } - // Initialize jumps between nodes + // Initialize jumps between the nodes. SuccNodes.resize(NumNodes); PredNodes.resize(NumNodes); std::vector<uint64_t> OutDegree(NumNodes, 0); AllJumps.reserve(EdgeCounts.size()); - for (auto It : EdgeCounts) { - uint64_t Pred = It.first.first; - uint64_t Succ = It.first.second; - OutDegree[Pred]++; - // Ignore self-edges - if (Pred == Succ) + for (auto Edge : EdgeCounts) { + ++OutDegree[Edge.src]; + // Ignore self-edges. + if (Edge.src == Edge.dst) continue; - SuccNodes[Pred].push_back(Succ); - PredNodes[Succ].push_back(Pred); - uint64_t ExecutionCount = It.second; - if (ExecutionCount > 0) { - NodeT &PredNode = AllNodes[Pred]; - NodeT &SuccNode = AllNodes[Succ]; - AllJumps.emplace_back(&PredNode, &SuccNode, ExecutionCount); + SuccNodes[Edge.src].push_back(Edge.dst); + PredNodes[Edge.dst].push_back(Edge.src); + if (Edge.count > 0) { + NodeT &PredNode = AllNodes[Edge.src]; + NodeT &SuccNode = AllNodes[Edge.dst]; + AllJumps.emplace_back(&PredNode, &SuccNode, Edge.count); SuccNode.InJumps.push_back(&AllJumps.back()); PredNode.OutJumps.push_back(&AllJumps.back()); + // Adjust execution counts. + PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Edge.count); + SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Edge.count); } } for (JumpT &Jump : AllJumps) { - assert(OutDegree[Jump.Source->Index] > 0); + assert(OutDegree[Jump.Source->Index] > 0 && + "incorrectly computed out-degree of the block"); Jump.IsConditional = OutDegree[Jump.Source->Index] > 1; } - // Initialize chains + // Initialize chains. AllChains.reserve(NumNodes); HotChains.reserve(NumNodes); for (NodeT &Node : AllNodes) { + // Create a chain. AllChains.emplace_back(Node.Index, &Node); Node.CurChain = &AllChains.back(); - if (Node.ExecutionCount > 0) { + if (Node.ExecutionCount > 0) HotChains.push_back(&AllChains.back()); - } } - // Initialize chain edges + // Initialize chain edges. AllEdges.reserve(AllJumps.size()); for (NodeT &PredNode : AllNodes) { for (JumpT *Jump : PredNode.OutJumps) { + assert(Jump->ExecutionCount > 0 && "incorrectly initialized jump"); NodeT *SuccNode = Jump->Target; ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain); - // this edge is already present in the graph + // This edge is already present in the graph. if (CurEdge != nullptr) { assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr); CurEdge->appendJump(Jump); continue; } - // this is a new edge + // This is a new edge. AllEdges.emplace_back(Jump); PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back()); SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back()); @@ -642,7 +692,7 @@ private: /// to B are from A. Such nodes should be adjacent in the optimal ordering; /// the method finds and merges such pairs of nodes. void mergeForcedPairs() { - // Find fallthroughs based on edge weights + // Find forced pairs of blocks. for (NodeT &Node : AllNodes) { if (SuccNodes[Node.Index].size() == 1 && PredNodes[SuccNodes[Node.Index][0]].size() == 1 && @@ -669,12 +719,12 @@ private: } if (SuccNode == nullptr) continue; - // Break the cycle + // Break the cycle. AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr; Node.ForcedPred = nullptr; } - // Merge nodes with their fallthrough successors + // Merge nodes with their fallthrough successors. for (NodeT &Node : AllNodes) { if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) { const NodeT *CurBlock = &Node; @@ -689,33 +739,42 @@ private: /// Merge pairs of chains while improving the ExtTSP objective. void mergeChainPairs() { - /// Deterministically compare pairs of chains + /// Deterministically compare pairs of chains. auto compareChainPairs = [](const ChainT *A1, const ChainT *B1, const ChainT *A2, const ChainT *B2) { - if (A1 != A2) - return A1->Id < A2->Id; - return B1->Id < B2->Id; + return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id); }; while (HotChains.size() > 1) { ChainT *BestChainPred = nullptr; ChainT *BestChainSucc = nullptr; MergeGainT BestGain; - // Iterate over all pairs of chains + // Iterate over all pairs of chains. for (ChainT *ChainPred : HotChains) { - // Get candidates for merging with the current chain - for (auto EdgeIt : ChainPred->Edges) { - ChainT *ChainSucc = EdgeIt.first; - ChainEdge *Edge = EdgeIt.second; - // Ignore loop edges - if (ChainPred == ChainSucc) + // Get candidates for merging with the current chain. + for (const auto &[ChainSucc, Edge] : ChainPred->Edges) { + // Ignore loop edges. + if (Edge->isSelfEdge()) continue; - - // Stop early if the combined chain violates the maximum allowed size + // Skip the merge if the combined chain violates the maximum specified + // size. if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize) continue; + // Don't merge the chains if they have vastly different densities. + // Skip the merge if the ratio between the densities exceeds + // MaxMergeDensityRatio. Smaller values of the option result in fewer + // merges, and hence, more chains. + const double ChainPredDensity = ChainPred->density(); + const double ChainSuccDensity = ChainSucc->density(); + assert(ChainPredDensity > 0.0 && ChainSuccDensity > 0.0 && + "incorrectly computed chain densities"); + auto [MinDensity, MaxDensity] = + std::minmax(ChainPredDensity, ChainSuccDensity); + const double Ratio = MaxDensity / MinDensity; + if (Ratio > MaxMergeDensityRatio) + continue; - // Compute the gain of merging the two chains + // Compute the gain of merging the two chains. MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge); if (CurGain.score() <= EPS) continue; @@ -731,11 +790,11 @@ private: } } - // Stop merging when there is no improvement + // Stop merging when there is no improvement. if (BestGain.score() <= EPS) break; - // Merge the best pair of chains + // Merge the best pair of chains. mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(), BestGain.mergeType()); } @@ -743,7 +802,7 @@ private: /// Merge remaining nodes into chains w/o taking jump counts into /// consideration. This allows to maintain the original node order in the - /// absence of profile data + /// absence of profile data. void mergeColdChains() { for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) { // Iterating in reverse order to make sure original fallthrough jumps are @@ -764,24 +823,22 @@ private: } /// Compute the Ext-TSP score for a given node order and a list of jumps. - double extTSPScore(const MergedChain &MergedBlocks, - const std::vector<JumpT *> &Jumps) const { - if (Jumps.empty()) - return 0.0; + double extTSPScore(const MergedNodesT &Nodes, + const MergedJumpsT &Jumps) const { uint64_t CurAddr = 0; - MergedBlocks.forEach([&](const NodeT *Node) { + Nodes.forEach([&](const NodeT *Node) { Node->EstimatedAddr = CurAddr; CurAddr += Node->Size; }); double Score = 0; - for (JumpT *Jump : Jumps) { + Jumps.forEach([&](const JumpT *Jump) { const NodeT *SrcBlock = Jump->Source; const NodeT *DstBlock = Jump->Target; Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size, DstBlock->EstimatedAddr, Jump->ExecutionCount, Jump->IsConditional); - } + }); return Score; } @@ -793,74 +850,76 @@ private: /// element being the corresponding merging type. MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc, ChainEdge *Edge) const { - if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) { + if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) return Edge->getCachedMergeGain(ChainPred, ChainSucc); - } - // Precompute jumps between ChainPred and ChainSucc - auto Jumps = Edge->jumps(); + assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps"); + // Precompute jumps between ChainPred and ChainSucc. ChainEdge *EdgePP = ChainPred->getEdge(ChainPred); - if (EdgePP != nullptr) { - Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end()); - } - assert(!Jumps.empty() && "trying to merge chains w/o jumps"); + MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr); - // The object holds the best currently chosen gain of merging the two chains + // This object holds the best chosen gain of merging two chains. MergeGainT Gain = MergeGainT(); /// Given a merge offset and a list of merge types, try to merge two chains - /// and update Gain with a better alternative + /// and update Gain with a better alternative. auto tryChainMerging = [&](size_t Offset, const std::vector<MergeTypeT> &MergeTypes) { - // Skip merging corresponding to concatenation w/o splitting + // Skip merging corresponding to concatenation w/o splitting. if (Offset == 0 || Offset == ChainPred->Nodes.size()) return; - // Skip merging if it breaks Forced successors + // Skip merging if it breaks Forced successors. NodeT *Node = ChainPred->Nodes[Offset - 1]; if (Node->ForcedSucc != nullptr) return; // Apply the merge, compute the corresponding gain, and update the best - // value, if the merge is beneficial + // value, if the merge is beneficial. for (const MergeTypeT &MergeType : MergeTypes) { Gain.updateIfLessThan( computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType)); } }; - // Try to concatenate two chains w/o splitting + // Try to concatenate two chains w/o splitting. Gain.updateIfLessThan( computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y)); - if (EnableChainSplitAlongJumps) { - // Attach (a part of) ChainPred before the first node of ChainSucc - for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) { - const NodeT *SrcBlock = Jump->Source; - if (SrcBlock->CurChain != ChainPred) - continue; - size_t Offset = SrcBlock->CurIndex + 1; - tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y}); - } + // Attach (a part of) ChainPred before the first node of ChainSucc. + for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) { + const NodeT *SrcBlock = Jump->Source; + if (SrcBlock->CurChain != ChainPred) + continue; + size_t Offset = SrcBlock->CurIndex + 1; + tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y}); + } - // Attach (a part of) ChainPred after the last node of ChainSucc - for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) { - const NodeT *DstBlock = Jump->Source; - if (DstBlock->CurChain != ChainPred) - continue; - size_t Offset = DstBlock->CurIndex; - tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1}); - } + // Attach (a part of) ChainPred after the last node of ChainSucc. + for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) { + const NodeT *DstBlock = Jump->Target; + if (DstBlock->CurChain != ChainPred) + continue; + size_t Offset = DstBlock->CurIndex; + tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1}); } - // Try to break ChainPred in various ways and concatenate with ChainSucc + // Try to break ChainPred in various ways and concatenate with ChainSucc. if (ChainPred->Nodes.size() <= ChainSplitThreshold) { for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) { - // Try to split the chain in different ways. In practice, applying - // X2_Y_X1 merging is almost never provides benefits; thus, we exclude - // it from consideration to reduce the search space + // Do not split the chain along a fall-through jump. One of the two + // loops above may still "break" such a jump whenever it results in a + // new fall-through. + const NodeT *BB = ChainPred->Nodes[Offset - 1]; + const NodeT *BB2 = ChainPred->Nodes[Offset]; + if (BB->isSuccessor(BB2)) + continue; + + // In practice, applying X2_Y_X1 merging almost never provides benefits; + // thus, we exclude it from consideration to reduce the search space. tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1, MergeTypeT::X2_X1_Y}); } } + Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain); return Gain; } @@ -870,19 +929,20 @@ private: /// /// The two chains are not modified in the method. MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc, - const std::vector<JumpT *> &Jumps, - size_t MergeOffset, MergeTypeT MergeType) const { - auto MergedBlocks = + const MergedJumpsT &Jumps, size_t MergeOffset, + MergeTypeT MergeType) const { + MergedNodesT MergedNodes = mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType); - // Do not allow a merge that does not preserve the original entry point + // Do not allow a merge that does not preserve the original entry point. if ((ChainPred->isEntry() || ChainSucc->isEntry()) && - !MergedBlocks.getFirstNode()->isEntry()) + !MergedNodes.getFirstNode()->isEntry()) return MergeGainT(); - // The gain for the new chain - auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score; - return MergeGainT(NewGainScore, MergeOffset, MergeType); + // The gain for the new chain. + double NewScore = extTSPScore(MergedNodes, Jumps); + double CurScore = ChainPred->Score; + return MergeGainT(NewScore - CurScore, MergeOffset, MergeType); } /// Merge chain From into chain Into, update the list of active chains, @@ -891,39 +951,398 @@ private: MergeTypeT MergeType) { assert(Into != From && "a chain cannot be merged with itself"); - // Merge the nodes - MergedChain MergedNodes = + // Merge the nodes. + MergedNodesT MergedNodes = mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); Into->merge(From, MergedNodes.getNodes()); - // Merge the edges + // Merge the edges. Into->mergeEdges(From); From->clear(); - // Update cached ext-tsp score for the new chain + // Update cached ext-tsp score for the new chain. ChainEdge *SelfEdge = Into->getEdge(Into); if (SelfEdge != nullptr) { - MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end()); - Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps()); + MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end()); + MergedJumpsT MergedJumps(&SelfEdge->jumps()); + Into->Score = extTSPScore(MergedNodes, MergedJumps); } - // Remove the chain from the list of active chains - llvm::erase_value(HotChains, From); + // Remove the chain from the list of active chains. + llvm::erase(HotChains, From); - // Invalidate caches + // Invalidate caches. for (auto EdgeIt : Into->Edges) EdgeIt.second->invalidateCache(); } /// Concatenate all chains into the final order. - void concatChains(std::vector<uint64_t> &Order) { - // Collect chains and calculate density stats for their sorting + std::vector<uint64_t> concatChains() { + // Collect non-empty chains. + std::vector<const ChainT *> SortedChains; + for (ChainT &Chain : AllChains) { + if (!Chain.Nodes.empty()) + SortedChains.push_back(&Chain); + } + + // Sorting chains by density in the decreasing order. + std::sort(SortedChains.begin(), SortedChains.end(), + [&](const ChainT *L, const ChainT *R) { + // Place the entry point at the beginning of the order. + if (L->isEntry() != R->isEntry()) + return L->isEntry(); + + // Compare by density and break ties by chain identifiers. + return std::make_tuple(-L->density(), L->Id) < + std::make_tuple(-R->density(), R->Id); + }); + + // Collect the nodes in the order specified by their chains. + std::vector<uint64_t> Order; + Order.reserve(NumNodes); + for (const ChainT *Chain : SortedChains) + for (NodeT *Node : Chain->Nodes) + Order.push_back(Node->Index); + return Order; + } + +private: + /// The number of nodes in the graph. + const size_t NumNodes; + + /// Successors of each node. + std::vector<std::vector<uint64_t>> SuccNodes; + + /// Predecessors of each node. + std::vector<std::vector<uint64_t>> PredNodes; + + /// All nodes (basic blocks) in the graph. + std::vector<NodeT> AllNodes; + + /// All jumps between the nodes. + std::vector<JumpT> AllJumps; + + /// All chains of nodes. + std::vector<ChainT> AllChains; + + /// All edges between the chains. + std::vector<ChainEdge> AllEdges; + + /// Active chains. The vector gets updated at runtime when chains are merged. + std::vector<ChainT *> HotChains; +}; + +/// The implementation of the Cache-Directed Sort (CDSort) algorithm for +/// ordering functions represented by a call graph. +class CDSortImpl { +public: + CDSortImpl(const CDSortConfig &Config, ArrayRef<uint64_t> NodeSizes, + ArrayRef<uint64_t> NodeCounts, ArrayRef<EdgeCount> EdgeCounts, + ArrayRef<uint64_t> EdgeOffsets) + : Config(Config), NumNodes(NodeSizes.size()) { + initialize(NodeSizes, NodeCounts, EdgeCounts, EdgeOffsets); + } + + /// Run the algorithm and return an ordered set of function clusters. + std::vector<uint64_t> run() { + // Merge pairs of chains while improving the objective. + mergeChainPairs(); + + // Collect nodes from all the chains. + return concatChains(); + } + +private: + /// Initialize the algorithm's data structures. + void initialize(const ArrayRef<uint64_t> &NodeSizes, + const ArrayRef<uint64_t> &NodeCounts, + const ArrayRef<EdgeCount> &EdgeCounts, + const ArrayRef<uint64_t> &EdgeOffsets) { + // Initialize nodes. + AllNodes.reserve(NumNodes); + for (uint64_t Node = 0; Node < NumNodes; Node++) { + uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL); + uint64_t ExecutionCount = NodeCounts[Node]; + AllNodes.emplace_back(Node, Size, ExecutionCount); + TotalSamples += ExecutionCount; + if (ExecutionCount > 0) + TotalSize += Size; + } + + // Initialize jumps between the nodes. + SuccNodes.resize(NumNodes); + PredNodes.resize(NumNodes); + AllJumps.reserve(EdgeCounts.size()); + for (size_t I = 0; I < EdgeCounts.size(); I++) { + auto [Pred, Succ, Count] = EdgeCounts[I]; + // Ignore recursive calls. + if (Pred == Succ) + continue; + + SuccNodes[Pred].push_back(Succ); + PredNodes[Succ].push_back(Pred); + if (Count > 0) { + NodeT &PredNode = AllNodes[Pred]; + NodeT &SuccNode = AllNodes[Succ]; + AllJumps.emplace_back(&PredNode, &SuccNode, Count); + AllJumps.back().Offset = EdgeOffsets[I]; + SuccNode.InJumps.push_back(&AllJumps.back()); + PredNode.OutJumps.push_back(&AllJumps.back()); + // Adjust execution counts. + PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Count); + SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Count); + } + } + + // Initialize chains. + AllChains.reserve(NumNodes); + for (NodeT &Node : AllNodes) { + // Adjust execution counts. + Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount()); + Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount()); + // Create chain. + AllChains.emplace_back(Node.Index, &Node); + Node.CurChain = &AllChains.back(); + } + + // Initialize chain edges. + AllEdges.reserve(AllJumps.size()); + for (NodeT &PredNode : AllNodes) { + for (JumpT *Jump : PredNode.OutJumps) { + NodeT *SuccNode = Jump->Target; + ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain); + // This edge is already present in the graph. + if (CurEdge != nullptr) { + assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr); + CurEdge->appendJump(Jump); + continue; + } + // This is a new edge. + AllEdges.emplace_back(Jump); + PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back()); + SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back()); + } + } + } + + /// Merge pairs of chains while there is an improvement in the objective. + void mergeChainPairs() { + // Create a priority queue containing all edges ordered by the merge gain. + auto GainComparator = [](ChainEdge *L, ChainEdge *R) { + return std::make_tuple(-L->gain(), L->srcChain()->Id, L->dstChain()->Id) < + std::make_tuple(-R->gain(), R->srcChain()->Id, R->dstChain()->Id); + }; + std::set<ChainEdge *, decltype(GainComparator)> Queue(GainComparator); + + // Insert the edges into the queue. + [[maybe_unused]] size_t NumActiveChains = 0; + for (NodeT &Node : AllNodes) { + if (Node.ExecutionCount == 0) + continue; + ++NumActiveChains; + for (const auto &[_, Edge] : Node.CurChain->Edges) { + // Ignore self-edges. + if (Edge->isSelfEdge()) + continue; + // Ignore already processed edges. + if (Edge->gain() != -1.0) + continue; + + // Compute the gain of merging the two chains. + MergeGainT Gain = getBestMergeGain(Edge); + Edge->setMergeGain(Gain); + + if (Edge->gain() > EPS) + Queue.insert(Edge); + } + } + + // Merge the chains while the gain of merging is positive. + while (!Queue.empty()) { + // Extract the best (top) edge for merging. + ChainEdge *BestEdge = *Queue.begin(); + Queue.erase(Queue.begin()); + ChainT *BestSrcChain = BestEdge->srcChain(); + ChainT *BestDstChain = BestEdge->dstChain(); + + // Remove outdated edges from the queue. + for (const auto &[_, ChainEdge] : BestSrcChain->Edges) + Queue.erase(ChainEdge); + for (const auto &[_, ChainEdge] : BestDstChain->Edges) + Queue.erase(ChainEdge); + + // Merge the best pair of chains. + MergeGainT BestGain = BestEdge->getMergeGain(); + mergeChains(BestSrcChain, BestDstChain, BestGain.mergeOffset(), + BestGain.mergeType()); + --NumActiveChains; + + // Insert newly created edges into the queue. + for (const auto &[_, Edge] : BestSrcChain->Edges) { + // Ignore loop edges. + if (Edge->isSelfEdge()) + continue; + if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() > + Config.MaxChainSize) + continue; + + // Compute the gain of merging the two chains. + MergeGainT Gain = getBestMergeGain(Edge); + Edge->setMergeGain(Gain); + + if (Edge->gain() > EPS) + Queue.insert(Edge); + } + } + + LLVM_DEBUG(dbgs() << "Cache-directed function sorting reduced the number" + << " of chains from " << NumNodes << " to " + << NumActiveChains << "\n"); + } + + /// Compute the gain of merging two chains. + /// + /// The function considers all possible ways of merging two chains and + /// computes the one having the largest increase in ExtTSP objective. The + /// result is a pair with the first element being the gain and the second + /// element being the corresponding merging type. + MergeGainT getBestMergeGain(ChainEdge *Edge) const { + assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps"); + // Precompute jumps between ChainPred and ChainSucc. + MergedJumpsT Jumps(&Edge->jumps()); + ChainT *SrcChain = Edge->srcChain(); + ChainT *DstChain = Edge->dstChain(); + + // This object holds the best currently chosen gain of merging two chains. + MergeGainT Gain = MergeGainT(); + + /// Given a list of merge types, try to merge two chains and update Gain + /// with a better alternative. + auto tryChainMerging = [&](const std::vector<MergeTypeT> &MergeTypes) { + // Apply the merge, compute the corresponding gain, and update the best + // value, if the merge is beneficial. + for (const MergeTypeT &MergeType : MergeTypes) { + MergeGainT NewGain = + computeMergeGain(SrcChain, DstChain, Jumps, MergeType); + + // When forward and backward gains are the same, prioritize merging that + // preserves the original order of the functions in the binary. + if (std::abs(Gain.score() - NewGain.score()) < EPS) { + if ((MergeType == MergeTypeT::X_Y && SrcChain->Id < DstChain->Id) || + (MergeType == MergeTypeT::Y_X && SrcChain->Id > DstChain->Id)) { + Gain = NewGain; + } + } else if (NewGain.score() > Gain.score() + EPS) { + Gain = NewGain; + } + } + }; + + // Try to concatenate two chains w/o splitting. + tryChainMerging({MergeTypeT::X_Y, MergeTypeT::Y_X}); + + return Gain; + } + + /// Compute the score gain of merging two chains, respecting a given type. + /// + /// The two chains are not modified in the method. + MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc, + const MergedJumpsT &Jumps, + MergeTypeT MergeType) const { + // This doesn't depend on the ordering of the nodes + double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc); + + // Merge offset is always 0, as the chains are not split. + size_t MergeOffset = 0; + auto MergedBlocks = + mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType); + double DistGain = distBasedLocalityGain(MergedBlocks, Jumps); + + double GainScore = DistGain + Config.FrequencyScale * FreqGain; + // Scale the result to increase the importance of merging short chains. + if (GainScore >= 0.0) + GainScore /= std::min(ChainPred->Size, ChainSucc->Size); + + return MergeGainT(GainScore, MergeOffset, MergeType); + } + + /// Compute the change of the frequency locality after merging the chains. + double freqBasedLocalityGain(ChainT *ChainPred, ChainT *ChainSucc) const { + auto missProbability = [&](double ChainDensity) { + double PageSamples = ChainDensity * Config.CacheSize; + if (PageSamples >= TotalSamples) + return 0.0; + double P = PageSamples / TotalSamples; + return pow(1.0 - P, static_cast<double>(Config.CacheEntries)); + }; + + // Cache misses on the chains before merging. + double CurScore = + ChainPred->ExecutionCount * missProbability(ChainPred->density()) + + ChainSucc->ExecutionCount * missProbability(ChainSucc->density()); + + // Cache misses on the merged chain + double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount; + double MergedSize = ChainPred->Size + ChainSucc->Size; + double MergedDensity = static_cast<double>(MergedCounts) / MergedSize; + double NewScore = MergedCounts * missProbability(MergedDensity); + + return CurScore - NewScore; + } + + /// Compute the distance locality for a jump / call. + double distScore(uint64_t SrcAddr, uint64_t DstAddr, uint64_t Count) const { + uint64_t Dist = SrcAddr <= DstAddr ? DstAddr - SrcAddr : SrcAddr - DstAddr; + double D = Dist == 0 ? 0.1 : static_cast<double>(Dist); + return static_cast<double>(Count) * std::pow(D, -Config.DistancePower); + } + + /// Compute the change of the distance locality after merging the chains. + double distBasedLocalityGain(const MergedNodesT &Nodes, + const MergedJumpsT &Jumps) const { + uint64_t CurAddr = 0; + Nodes.forEach([&](const NodeT *Node) { + Node->EstimatedAddr = CurAddr; + CurAddr += Node->Size; + }); + + double CurScore = 0; + double NewScore = 0; + Jumps.forEach([&](const JumpT *Jump) { + uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset; + uint64_t DstAddr = Jump->Target->EstimatedAddr; + NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount); + CurScore += distScore(0, TotalSize, Jump->ExecutionCount); + }); + return NewScore - CurScore; + } + + /// Merge chain From into chain Into, update the list of active chains, + /// adjacency information, and the corresponding cached values. + void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset, + MergeTypeT MergeType) { + assert(Into != From && "a chain cannot be merged with itself"); + + // Merge the nodes. + MergedNodesT MergedNodes = + mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); + Into->merge(From, MergedNodes.getNodes()); + + // Merge the edges. + Into->mergeEdges(From); + From->clear(); + } + + /// Concatenate all chains into the final order. + std::vector<uint64_t> concatChains() { + // Collect chains and calculate density stats for their sorting. std::vector<const ChainT *> SortedChains; DenseMap<const ChainT *, double> ChainDensity; for (ChainT &Chain : AllChains) { if (!Chain.Nodes.empty()) { SortedChains.push_back(&Chain); - // Using doubles to avoid overflow of ExecutionCounts + // Using doubles to avoid overflow of ExecutionCounts. double Size = 0; double ExecutionCount = 0; for (NodeT *Node : Chain.Nodes) { @@ -935,30 +1354,29 @@ private: } } - // Sorting chains by density in the decreasing order - std::stable_sort(SortedChains.begin(), SortedChains.end(), - [&](const ChainT *L, const ChainT *R) { - // Make sure the original entry point is at the - // beginning of the order - if (L->isEntry() != R->isEntry()) - return L->isEntry(); - - const double DL = ChainDensity[L]; - const double DR = ChainDensity[R]; - // Compare by density and break ties by chain identifiers - return (DL != DR) ? (DL > DR) : (L->Id < R->Id); - }); + // Sort chains by density in the decreasing order. + std::sort(SortedChains.begin(), SortedChains.end(), + [&](const ChainT *L, const ChainT *R) { + const double DL = ChainDensity[L]; + const double DR = ChainDensity[R]; + // Compare by density and break ties by chain identifiers. + return std::make_tuple(-DL, L->Id) < + std::make_tuple(-DR, R->Id); + }); - // Collect the nodes in the order specified by their chains + // Collect the nodes in the order specified by their chains. + std::vector<uint64_t> Order; Order.reserve(NumNodes); - for (const ChainT *Chain : SortedChains) { - for (NodeT *Node : Chain->Nodes) { + for (const ChainT *Chain : SortedChains) + for (NodeT *Node : Chain->Nodes) Order.push_back(Node->Index); - } - } + return Order; } private: + /// Config for the algorithm. + const CDSortConfig Config; + /// The number of nodes in the graph. const size_t NumNodes; @@ -968,10 +1386,10 @@ private: /// Predecessors of each node. std::vector<std::vector<uint64_t>> PredNodes; - /// All nodes (basic blocks) in the graph. + /// All nodes (functions) in the graph. std::vector<NodeT> AllNodes; - /// All jumps between the nodes. + /// All jumps (function calls) between the nodes. std::vector<JumpT> AllJumps; /// All chains of nodes. @@ -980,65 +1398,95 @@ private: /// All edges between the chains. std::vector<ChainEdge> AllEdges; - /// Active chains. The vector gets updated at runtime when chains are merged. - std::vector<ChainT *> HotChains; + /// The total number of samples in the graph. + uint64_t TotalSamples{0}; + + /// The total size of the nodes in the graph. + uint64_t TotalSize{0}; }; } // end of anonymous namespace std::vector<uint64_t> -llvm::applyExtTspLayout(const std::vector<uint64_t> &NodeSizes, - const std::vector<uint64_t> &NodeCounts, - const std::vector<EdgeCountT> &EdgeCounts) { - // Verify correctness of the input data +codelayout::computeExtTspLayout(ArrayRef<uint64_t> NodeSizes, + ArrayRef<uint64_t> NodeCounts, + ArrayRef<EdgeCount> EdgeCounts) { + // Verify correctness of the input data. assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input"); assert(NodeSizes.size() > 2 && "Incorrect input"); - // Apply the reordering algorithm + // Apply the reordering algorithm. ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts); - std::vector<uint64_t> Result; - Alg.run(Result); + std::vector<uint64_t> Result = Alg.run(); - // Verify correctness of the output + // Verify correctness of the output. assert(Result.front() == 0 && "Original entry point is not preserved"); assert(Result.size() == NodeSizes.size() && "Incorrect size of layout"); return Result; } -double llvm::calcExtTspScore(const std::vector<uint64_t> &Order, - const std::vector<uint64_t> &NodeSizes, - const std::vector<uint64_t> &NodeCounts, - const std::vector<EdgeCountT> &EdgeCounts) { - // Estimate addresses of the blocks in memory +double codelayout::calcExtTspScore(ArrayRef<uint64_t> Order, + ArrayRef<uint64_t> NodeSizes, + ArrayRef<uint64_t> NodeCounts, + ArrayRef<EdgeCount> EdgeCounts) { + // Estimate addresses of the blocks in memory. std::vector<uint64_t> Addr(NodeSizes.size(), 0); for (size_t Idx = 1; Idx < Order.size(); Idx++) { Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]]; } std::vector<uint64_t> OutDegree(NodeSizes.size(), 0); - for (auto It : EdgeCounts) { - uint64_t Pred = It.first.first; - OutDegree[Pred]++; - } + for (auto Edge : EdgeCounts) + ++OutDegree[Edge.src]; - // Increase the score for each jump + // Increase the score for each jump. double Score = 0; - for (auto It : EdgeCounts) { - uint64_t Pred = It.first.first; - uint64_t Succ = It.first.second; - uint64_t Count = It.second; - bool IsConditional = OutDegree[Pred] > 1; - Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count, - IsConditional); + for (auto Edge : EdgeCounts) { + bool IsConditional = OutDegree[Edge.src] > 1; + Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst], + Edge.count, IsConditional); } return Score; } -double llvm::calcExtTspScore(const std::vector<uint64_t> &NodeSizes, - const std::vector<uint64_t> &NodeCounts, - const std::vector<EdgeCountT> &EdgeCounts) { +double codelayout::calcExtTspScore(ArrayRef<uint64_t> NodeSizes, + ArrayRef<uint64_t> NodeCounts, + ArrayRef<EdgeCount> EdgeCounts) { std::vector<uint64_t> Order(NodeSizes.size()); for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) { Order[Idx] = Idx; } return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts); } + +std::vector<uint64_t> codelayout::computeCacheDirectedLayout( + const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes, + ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts, + ArrayRef<uint64_t> CallOffsets) { + // Verify correctness of the input data. + assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input"); + + // Apply the reordering algorithm. + CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets); + std::vector<uint64_t> Result = Alg.run(); + assert(Result.size() == FuncSizes.size() && "Incorrect size of layout"); + return Result; +} + +std::vector<uint64_t> codelayout::computeCacheDirectedLayout( + ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts, + ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets) { + CDSortConfig Config; + // Populate the config from the command-line options. + if (CacheEntries.getNumOccurrences() > 0) + Config.CacheEntries = CacheEntries; + if (CacheSize.getNumOccurrences() > 0) + Config.CacheSize = CacheSize; + if (CDMaxChainSize.getNumOccurrences() > 0) + Config.MaxChainSize = CDMaxChainSize; + if (DistancePower.getNumOccurrences() > 0) + Config.DistancePower = DistancePower; + if (FrequencyScale.getNumOccurrences() > 0) + Config.FrequencyScale = FrequencyScale; + return computeCacheDirectedLayout(Config, FuncSizes, FuncCounts, CallCounts, + CallOffsets); +} diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp index 4a6719741719..6a2dae5bab68 100644 --- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -417,7 +417,7 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, Instruction *MovePos = ToBB.getFirstNonPHIOrDbg(); if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI)) - I.moveBefore(MovePos); + I.moveBeforePreserving(MovePos); } } @@ -429,7 +429,7 @@ void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, while (FromBB.size() > 1) { Instruction &I = FromBB.front(); if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI)) - I.moveBefore(MovePos); + I.moveBeforePreserving(MovePos); } } diff --git a/llvm/lib/Transforms/Utils/CtorUtils.cpp b/llvm/lib/Transforms/Utils/CtorUtils.cpp index e07c92df2265..507729bc5ebc 100644 --- a/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ b/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -52,12 +52,9 @@ static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemov NGV->takeName(GCL); // Nuke the old list, replacing any uses with the new one. - if (!GCL->use_empty()) { - Constant *V = NGV; - if (V->getType() != GCL->getType()) - V = ConstantExpr::getBitCast(V, GCL->getType()); - GCL->replaceAllUsesWith(V); - } + if (!GCL->use_empty()) + GCL->replaceAllUsesWith(NGV); + GCL->eraseFromParent(); } diff --git a/llvm/lib/Transforms/Utils/DXILUpgrade.cpp b/llvm/lib/Transforms/Utils/DXILUpgrade.cpp new file mode 100644 index 000000000000..735686ddce38 --- /dev/null +++ b/llvm/lib/Transforms/Utils/DXILUpgrade.cpp @@ -0,0 +1,36 @@ +//===- DXILUpgrade.cpp - Upgrade DXIL metadata to LLVM constructs ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/DXILUpgrade.h" + +using namespace llvm; + +static bool handleValVerMetadata(Module &M) { + NamedMDNode *ValVer = M.getNamedMetadata("dx.valver"); + if (!ValVer) + return false; + + // We don't need the validation version internally, so we drop it. + ValVer->dropAllReferences(); + ValVer->eraseFromParent(); + return true; +} + +PreservedAnalyses DXILUpgradePass::run(Module &M, ModuleAnalysisManager &AM) { + PreservedAnalyses PA; + // We never add, remove, or change functions here. + PA.preserve<FunctionAnalysisManagerModuleProxy>(); + PA.preserveSet<AllAnalysesOn<Function>>(); + + bool Changed = false; + Changed |= handleValVerMetadata(M); + + if (!Changed) + return PreservedAnalyses::all(); + return PA; +} diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index 93cad0888a56..d0cc603426d2 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -801,7 +801,15 @@ bool checkDebugifyMetadata(Module &M, /// legacy module pass manager. struct DebugifyModulePass : public ModulePass { bool runOnModule(Module &M) override { - return applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass); + bool NewDebugMode = M.IsNewDbgInfoFormat; + if (NewDebugMode) + M.convertFromNewDbgValues(); + + bool Result = applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass); + + if (NewDebugMode) + M.convertToNewDbgValues(); + return Result; } DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, @@ -826,7 +834,15 @@ private: /// single function, used with the legacy module pass manager. struct DebugifyFunctionPass : public FunctionPass { bool runOnFunction(Function &F) override { - return applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass); + bool NewDebugMode = F.IsNewDbgInfoFormat; + if (NewDebugMode) + F.convertFromNewDbgValues(); + + bool Result = applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass); + + if (NewDebugMode) + F.convertToNewDbgValues(); + return Result; } DebugifyFunctionPass( @@ -852,13 +868,24 @@ private: /// legacy module pass manager. struct CheckDebugifyModulePass : public ModulePass { bool runOnModule(Module &M) override { + bool NewDebugMode = M.IsNewDbgInfoFormat; + if (NewDebugMode) + M.convertFromNewDbgValues(); + + bool Result; if (Mode == DebugifyMode::SyntheticDebugInfo) - return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, + Result = checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, "CheckModuleDebugify", Strip, StatsMap); - return checkDebugInfoMetadata( + else + Result = checkDebugInfoMetadata( M, M.functions(), *DebugInfoBeforePass, "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, OrigDIVerifyBugsReportFilePath); + + if (NewDebugMode) + M.convertToNewDbgValues(); + + return Result; } CheckDebugifyModulePass( @@ -891,16 +918,26 @@ private: /// with the legacy module pass manager. struct CheckDebugifyFunctionPass : public FunctionPass { bool runOnFunction(Function &F) override { + bool NewDebugMode = F.IsNewDbgInfoFormat; + if (NewDebugMode) + F.convertFromNewDbgValues(); + Module &M = *F.getParent(); auto FuncIt = F.getIterator(); + bool Result; if (Mode == DebugifyMode::SyntheticDebugInfo) - return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), + Result = checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), NameOfWrappedPass, "CheckFunctionDebugify", Strip, StatsMap); - return checkDebugInfoMetadata( + else + Result = checkDebugInfoMetadata( M, make_range(FuncIt, std::next(FuncIt)), *DebugInfoBeforePass, "CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass, OrigDIVerifyBugsReportFilePath); + + if (NewDebugMode) + F.convertToNewDbgValues(); + return Result; } CheckDebugifyFunctionPass( @@ -972,6 +1009,10 @@ createDebugifyFunctionPass(enum DebugifyMode Mode, } PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { + bool NewDebugMode = M.IsNewDbgInfoFormat; + if (NewDebugMode) + M.convertFromNewDbgValues(); + if (Mode == DebugifyMode::SyntheticDebugInfo) applyDebugifyMetadata(M, M.functions(), "ModuleDebugify: ", /*ApplyToMF*/ nullptr); @@ -979,6 +1020,10 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass, "ModuleDebugify (original debuginfo)", NameOfWrappedPass); + + if (NewDebugMode) + M.convertToNewDbgValues(); + PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); return PA; @@ -1010,6 +1055,10 @@ FunctionPass *createCheckDebugifyFunctionPass( PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, ModuleAnalysisManager &) { + bool NewDebugMode = M.IsNewDbgInfoFormat; + if (NewDebugMode) + M.convertFromNewDbgValues(); + if (Mode == DebugifyMode::SyntheticDebugInfo) checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, "CheckModuleDebugify", Strip, StatsMap); @@ -1018,6 +1067,10 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, M, M.functions(), *DebugInfoBeforePass, "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, OrigDIVerifyBugsReportFilePath); + + if (NewDebugMode) + M.convertToNewDbgValues(); + return PreservedAnalyses::all(); } @@ -1035,13 +1088,13 @@ void DebugifyEachInstrumentation::registerCallbacks( return; PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); - if (const auto **CF = any_cast<const Function *>(&IR)) { + if (const auto **CF = llvm::any_cast<const Function *>(&IR)) { Function &F = *const_cast<Function *>(*CF); applyDebugify(F, Mode, DebugInfoBeforePass, P); MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent()) .getManager() .invalidate(F, PA); - } else if (const auto **CM = any_cast<const Module *>(&IR)) { + } else if (const auto **CM = llvm::any_cast<const Module *>(&IR)) { Module &M = *const_cast<Module *>(*CM); applyDebugify(M, Mode, DebugInfoBeforePass, P); MAM.invalidate(M, PA); @@ -1053,7 +1106,7 @@ void DebugifyEachInstrumentation::registerCallbacks( return; PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); - if (const auto **CF = any_cast<const Function *>(&IR)) { + if (const auto **CF = llvm::any_cast<const Function *>(&IR)) { auto &F = *const_cast<Function *>(*CF); Module &M = *F.getParent(); auto It = F.getIterator(); @@ -1069,7 +1122,7 @@ void DebugifyEachInstrumentation::registerCallbacks( MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent()) .getManager() .invalidate(F, PA); - } else if (const auto **CM = any_cast<const Module *>(&IR)) { + } else if (const auto **CM = llvm::any_cast<const Module *>(&IR)) { Module &M = *const_cast<Module *>(*CM); if (Mode == DebugifyMode::SyntheticDebugInfo) checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify", diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index d424ebbef99d..092f1799755d 100644 --- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -35,7 +35,7 @@ static void insertCall(Function &CurFn, StringRef Func, Triple TargetTriple(M.getTargetTriple()); if (TargetTriple.isOSAIX() && Func == "__mcount") { Type *SizeTy = M.getDataLayout().getIntPtrType(C); - Type *SizePtrTy = SizeTy->getPointerTo(); + Type *SizePtrTy = PointerType::getUnqual(C); GlobalVariable *GV = new GlobalVariable(M, SizeTy, /*isConstant=*/false, GlobalValue::InternalLinkage, ConstantInt::get(SizeTy, 0)); @@ -54,7 +54,7 @@ static void insertCall(Function &CurFn, StringRef Func, } if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") { - Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)}; + Type *ArgTypes[] = {PointerType::getUnqual(C), PointerType::getUnqual(C)}; FunctionCallee Fn = M.getOrInsertFunction( Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false)); @@ -65,9 +65,7 @@ static void insertCall(Function &CurFn, StringRef Func, InsertionPt); RetAddr->setDebugLoc(DL); - Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)), - RetAddr}; - + Value *Args[] = {&CurFn, RetAddr}; CallInst *Call = CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt); Call->setDebugLoc(DL); diff --git a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp index 88c838685bca..cc00106fcbfe 100644 --- a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -70,7 +70,7 @@ IRBuilder<> *EscapeEnumerator::Next() { // Create a cleanup block. LLVMContext &C = F.getContext(); BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); + Type *ExnTy = StructType::get(PointerType::getUnqual(C), Type::getInt32Ty(C)); if (!F.hasPersonalityFn()) { FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent()); F.setPersonalityFn(cast<Constant>(PersFn.getCallee())); diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp index dda236167363..11e24d0585be 100644 --- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -87,10 +87,8 @@ struct FixIrreducible : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(LowerSwitchID); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreservedID(LowerSwitchID); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>(); } @@ -106,7 +104,6 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); } INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible", "Convert irreducible control-flow into natural loops", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible", @@ -317,6 +314,8 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " << F.getName() << "\n"); + assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); + bool Changed = false; SmallVector<Loop *, 8> WorkList; diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 8daeb92130ba..79ca99d1566c 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -160,10 +160,23 @@ int FunctionComparator::cmpAttrs(const AttributeList L, int FunctionComparator::cmpMetadata(const Metadata *L, const Metadata *R) const { // TODO: the following routine coerce the metadata contents into constants - // before comparison. + // or MDStrings before comparison. // It ignores any other cases, so that the metadata nodes are considered // equal even though this is not correct. // We should structurally compare the metadata nodes to be perfect here. + + auto *MDStringL = dyn_cast<MDString>(L); + auto *MDStringR = dyn_cast<MDString>(R); + if (MDStringL && MDStringR) { + if (MDStringL == MDStringR) + return 0; + return MDStringL->getString().compare(MDStringR->getString()); + } + if (MDStringR) + return -1; + if (MDStringL) + return 1; + auto *CL = dyn_cast<ConstantAsMetadata>(L); auto *CR = dyn_cast<ConstantAsMetadata>(R); if (CL == CR) @@ -820,6 +833,21 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) const { if (ConstR) return -1; + const MetadataAsValue *MetadataValueL = dyn_cast<MetadataAsValue>(L); + const MetadataAsValue *MetadataValueR = dyn_cast<MetadataAsValue>(R); + if (MetadataValueL && MetadataValueR) { + if (MetadataValueL == MetadataValueR) + return 0; + + return cmpMetadata(MetadataValueL->getMetadata(), + MetadataValueR->getMetadata()); + } + + if (MetadataValueL) + return 1; + if (MetadataValueR) + return -1; + const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); @@ -958,67 +986,3 @@ int FunctionComparator::compare() { } return 0; } - -namespace { - -// Accumulate the hash of a sequence of 64-bit integers. This is similar to a -// hash of a sequence of 64bit ints, but the entire input does not need to be -// available at once. This interface is necessary for functionHash because it -// needs to accumulate the hash as the structure of the function is traversed -// without saving these values to an intermediate buffer. This form of hashing -// is not often needed, as usually the object to hash is just read from a -// buffer. -class HashAccumulator64 { - uint64_t Hash; - -public: - // Initialize to random constant, so the state isn't zero. - HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } - - void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } - - // No finishing is required, because the entire hash value is used. - uint64_t getHash() { return Hash; } -}; - -} // end anonymous namespace - -// A function hash is calculated by considering only the number of arguments and -// whether a function is varargs, the order of basic blocks (given by the -// successors of each basic block in depth first order), and the order of -// opcodes of each instruction within each of these basic blocks. This mirrors -// the strategy compare() uses to compare functions by walking the BBs in depth -// first order and comparing each instruction in sequence. Because this hash -// does not look at the operands, it is insensitive to things such as the -// target of calls and the constants used in the function, which makes it useful -// when possibly merging functions which are the same modulo constants and call -// targets. -FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { - HashAccumulator64 H; - H.add(F.isVarArg()); - H.add(F.arg_size()); - - SmallVector<const BasicBlock *, 8> BBs; - SmallPtrSet<const BasicBlock *, 16> VisitedBBs; - - // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), - // accumulating the hash of the function "structure." (BB and opcode sequence) - BBs.push_back(&F.getEntryBlock()); - VisitedBBs.insert(BBs[0]); - while (!BBs.empty()) { - const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the opcodes - H.add(45798); - for (const auto &Inst : *BB) { - H.add(Inst.getOpcode()); - } - const Instruction *Term = BB->getTerminator(); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(Term->getSuccessor(i)).second) - continue; - BBs.push_back(Term->getSuccessor(i)); - } - } - return H.getHash(); -} diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index dab0be3a9fde..0990c750af55 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -91,18 +91,16 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { Mappings.end()); auto AddVariantDecl = [&](const ElementCount &VF, bool Predicate) { - const std::string TLIName = - std::string(TLI.getVectorizedFunction(ScalarName, VF, Predicate)); - if (!TLIName.empty()) { - std::string MangledName = VFABI::mangleTLIVectorName( - TLIName, ScalarName, CI.arg_size(), VF, Predicate); + const VecDesc *VD = TLI.getVectorMappingInfo(ScalarName, VF, Predicate); + if (VD && !VD->getVectorFnName().empty()) { + std::string MangledName = VD->getVectorFunctionABIVariantString(); if (!OriginalSetOfMappings.count(MangledName)) { Mappings.push_back(MangledName); ++NumCallInjected; } - Function *VariantF = M->getFunction(TLIName); + Function *VariantF = M->getFunction(VD->getVectorFnName()); if (!VariantF) - addVariantDeclaration(CI, VF, Predicate, TLIName); + addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName()); } }; diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f7b93fc8fd06..39d5f6e53c1d 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/AttributeMask.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -189,20 +190,21 @@ BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { const unsigned PHICapacity = 2; // Create corresponding new PHIs for all the PHIs in the outer landing pad. - Instruction *InsertPoint = &InnerResumeDest->front(); + BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); BasicBlock::iterator I = OuterResumeDest->begin(); for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { PHINode *OuterPHI = cast<PHINode>(I); PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, - OuterPHI->getName() + ".lpad-body", - InsertPoint); + OuterPHI->getName() + ".lpad-body"); + InnerPHI->insertBefore(InsertPoint); OuterPHI->replaceAllUsesWith(InnerPHI); InnerPHI->addIncoming(OuterPHI, OuterResumeDest); } // Create a PHI for the exception values. - InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, - "eh.lpad-body", InsertPoint); + InnerEHValuesPHI = + PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body"); + InnerEHValuesPHI->insertBefore(InsertPoint); CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); @@ -1331,38 +1333,51 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, } } -static bool MayContainThrowingOrExitingCall(Instruction *Begin, - Instruction *End) { +static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, + ReturnInst *End) { assert(Begin->getParent() == End->getParent() && "Expected to be in same basic block!"); + auto BeginIt = Begin->getIterator(); + assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator"); return !llvm::isGuaranteedToTransferExecutionToSuccessor( - Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1); + ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1); } -static AttrBuilder IdentifyValidAttributes(CallBase &CB) { +// Only allow these white listed attributes to be propagated back to the +// callee. This is because other attributes may only be valid on the call +// itself, i.e. attributes such as signext and zeroext. - AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs()); - if (!AB.hasAttributes()) - return AB; +// Attributes that are always okay to propagate as if they are violated its +// immediate UB. +static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) { AttrBuilder Valid(CB.getContext()); - // Only allow these white listed attributes to be propagated back to the - // callee. This is because other attributes may only be valid on the call - // itself, i.e. attributes such as signext and zeroext. - if (auto DerefBytes = AB.getDereferenceableBytes()) + if (auto DerefBytes = CB.getRetDereferenceableBytes()) Valid.addDereferenceableAttr(DerefBytes); - if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes()) + if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes()) Valid.addDereferenceableOrNullAttr(DerefOrNullBytes); - if (AB.contains(Attribute::NoAlias)) + if (CB.hasRetAttr(Attribute::NoAlias)) Valid.addAttribute(Attribute::NoAlias); - if (AB.contains(Attribute::NonNull)) + if (CB.hasRetAttr(Attribute::NoUndef)) + Valid.addAttribute(Attribute::NoUndef); + return Valid; +} + +// Attributes that need additional checks as propagating them may change +// behavior or cause new UB. +static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { + AttrBuilder Valid(CB.getContext()); + if (CB.hasRetAttr(Attribute::NonNull)) Valid.addAttribute(Attribute::NonNull); + if (CB.hasRetAttr(Attribute::Alignment)) + Valid.addAlignmentAttr(CB.getRetAlign()); return Valid; } static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { - AttrBuilder Valid = IdentifyValidAttributes(CB); - if (!Valid.hasAttributes()) + AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB); + AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB); + if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes()) return; auto *CalledFunction = CB.getCalledFunction(); auto &Context = CalledFunction->getContext(); @@ -1397,7 +1412,7 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { // limit the check to both RetVal and RI are in the same basic block and // there are no throwing/exiting instructions between these instructions. if (RI->getParent() != RetVal->getParent() || - MayContainThrowingOrExitingCall(RetVal, RI)) + MayContainThrowingOrExitingCallAfterCB(RetVal, RI)) continue; // Add to the existing attributes of NewRetVal, i.e. the cloned call // instruction. @@ -1406,7 +1421,62 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { // existing attribute value (i.e. attributes such as dereferenceable, // dereferenceable_or_null etc). See AttrBuilder::merge for more details. AttributeList AL = NewRetVal->getAttributes(); - AttributeList NewAL = AL.addRetAttributes(Context, Valid); + if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes()) + ValidUB.removeAttribute(Attribute::Dereferenceable); + if (ValidUB.getDereferenceableOrNullBytes() < + AL.getRetDereferenceableOrNullBytes()) + ValidUB.removeAttribute(Attribute::DereferenceableOrNull); + AttributeList NewAL = AL.addRetAttributes(Context, ValidUB); + // Attributes that may generate poison returns are a bit tricky. If we + // propagate them, other uses of the callsite might have their behavior + // change or cause UB (if they have noundef) b.c of the new potential + // poison. + // Take the following three cases: + // + // 1) + // define nonnull ptr @foo() { + // %p = call ptr @bar() + // call void @use(ptr %p) willreturn nounwind + // ret ptr %p + // } + // + // 2) + // define noundef nonnull ptr @foo() { + // %p = call ptr @bar() + // call void @use(ptr %p) willreturn nounwind + // ret ptr %p + // } + // + // 3) + // define nonnull ptr @foo() { + // %p = call noundef ptr @bar() + // ret ptr %p + // } + // + // In case 1, we can't propagate nonnull because poison value in @use may + // change behavior or trigger UB. + // In case 2, we don't need to be concerned about propagating nonnull, as + // any new poison at @use will trigger UB anyways. + // In case 3, we can never propagate nonnull because it may create UB due to + // the noundef on @bar. + if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne()) + ValidPG.removeAttribute(Attribute::Alignment); + if (ValidPG.hasAttributes()) { + // Three checks. + // If the callsite has `noundef`, then a poison due to violating the + // return attribute will create UB anyways so we can always propagate. + // Otherwise, if the return value (callee to be inlined) has `noundef`, we + // can't propagate as a new poison return will cause UB. + // Finally, check if the return value has no uses whose behavior may + // change/may cause UB if we potentially return poison. At the moment this + // is implemented overly conservatively with a single-use check. + // TODO: Update the single-use check to iterate through uses and only bail + // if we have a potentially dangerous use. + + if (CB.hasRetAttr(Attribute::NoUndef) || + (RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef))) + NewAL = NewAL.addRetAttributes(Context, ValidPG); + } NewRetVal->setAttributes(NewAL); } } @@ -1515,10 +1585,10 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg, if (ByValAlignment) Alignment = std::max(Alignment, *ByValAlignment); - Value *NewAlloca = - new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment, - Arg->getName(), &*Caller->begin()->begin()); - IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); + AllocaInst *NewAlloca = new AllocaInst(ByValType, DL.getAllocaAddrSpace(), + nullptr, Alignment, Arg->getName()); + NewAlloca->insertBefore(Caller->begin()->begin()); + IFI.StaticAllocas.push_back(NewAlloca); // Uses of the argument in the function should use our new alloca // instead. @@ -1538,8 +1608,8 @@ static bool isUsedByLifetimeMarker(Value *V) { // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { Type *Ty = AI->getType(); - Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), - Ty->getPointerAddressSpace()); + Type *Int8PtrTy = + PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace()); if (Ty == Int8PtrTy) return isUsedByLifetimeMarker(AI); @@ -1596,48 +1666,71 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, // the call site location instead. bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables"); - for (; FI != Fn->end(); ++FI) { - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - // Loop metadata needs to be updated so that the start and end locs - // reference inlined-at locations. - auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, - &IANodes](Metadata *MD) -> Metadata * { - if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) - return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); - return MD; - }; - updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc); + // Helper-util for updating the metadata attached to an instruction. + auto UpdateInst = [&](Instruction &I) { + // Loop metadata needs to be updated so that the start and end locs + // reference inlined-at locations. + auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, + &IANodes](Metadata *MD) -> Metadata * { + if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) + return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); + return MD; + }; + updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); - if (!NoInlineLineTables) - if (DebugLoc DL = BI->getDebugLoc()) { - DebugLoc IDL = - inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); - BI->setDebugLoc(IDL); - continue; - } + if (!NoInlineLineTables) + if (DebugLoc DL = I.getDebugLoc()) { + DebugLoc IDL = + inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes); + I.setDebugLoc(IDL); + return; + } - if (CalleeHasDebugInfo && !NoInlineLineTables) - continue; + if (CalleeHasDebugInfo && !NoInlineLineTables) + return; - // If the inlined instruction has no line number, or if inline info - // is not being generated, make it look as if it originates from the call - // location. This is important for ((__always_inline, __nodebug__)) - // functions which must use caller location for all instructions in their - // function body. + // If the inlined instruction has no line number, or if inline info + // is not being generated, make it look as if it originates from the call + // location. This is important for ((__always_inline, __nodebug__)) + // functions which must use caller location for all instructions in their + // function body. - // Don't update static allocas, as they may get moved later. - if (auto *AI = dyn_cast<AllocaInst>(BI)) - if (allocaWouldBeStaticInEntry(AI)) - continue; + // Don't update static allocas, as they may get moved later. + if (auto *AI = dyn_cast<AllocaInst>(&I)) + if (allocaWouldBeStaticInEntry(AI)) + return; - // Do not force a debug loc for pseudo probes, since they do not need to - // be debuggable, and also they are expected to have a zero/null dwarf - // discriminator at this point which could be violated otherwise. - if (isa<PseudoProbeInst>(BI)) - continue; + // Do not force a debug loc for pseudo probes, since they do not need to + // be debuggable, and also they are expected to have a zero/null dwarf + // discriminator at this point which could be violated otherwise. + if (isa<PseudoProbeInst>(I)) + return; - BI->setDebugLoc(TheCallDL); + I.setDebugLoc(TheCallDL); + }; + + // Helper-util for updating debug-info records attached to instructions. + auto UpdateDPV = [&](DPValue *DPV) { + assert(DPV->getDebugLoc() && "Debug Value must have debug loc"); + if (NoInlineLineTables) { + DPV->setDebugLoc(TheCallDL); + return; + } + DebugLoc DL = DPV->getDebugLoc(); + DebugLoc IDL = + inlineDebugLoc(DL, InlinedAtNode, + DPV->getMarker()->getParent()->getContext(), IANodes); + DPV->setDebugLoc(IDL); + }; + + // Iterate over all instructions, updating metadata and debug-info records. + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; + ++BI) { + UpdateInst(*BI); + for (DPValue &DPV : BI->getDbgValueRange()) { + UpdateDPV(&DPV); + } } // Remove debug info intrinsics if we're not keeping inline info. @@ -1647,11 +1740,12 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, if (isa<DbgInfoIntrinsic>(BI)) { BI = BI->eraseFromParent(); continue; + } else { + BI->dropDbgValues(); } ++BI; } } - } } @@ -1760,12 +1854,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, continue; auto *OrigBB = cast<BasicBlock>(Entry.first); auto *ClonedBB = cast<BasicBlock>(Entry.second); - uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); + BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB); if (!ClonedBBs.insert(ClonedBB).second) { // Multiple blocks in the callee might get mapped to one cloned block in // the caller since we prune the callee as we clone it. When that happens, // we want to use the maximum among the original blocks' frequencies. - uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); + BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB); if (NewFreq > Freq) Freq = NewFreq; } @@ -1773,8 +1867,7 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, } BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); CallerBFI->setBlockFreqAndScale( - EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), - ClonedBBs); + EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs); } /// Update the branch metadata for cloned call instructions. @@ -1882,8 +1975,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, Builder.SetInsertPoint(II); Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_release); - Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); - Builder.CreateCall(IFn, BC, ""); + Builder.CreateCall(IFn, RetOpnd, ""); } II->eraseFromParent(); InsertRetainCall = false; @@ -1918,8 +2010,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, // to objc_retain. Builder.SetInsertPoint(RI); Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain); - Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); - Builder.CreateCall(IFn, BC, ""); + Builder.CreateCall(IFn, RetOpnd, ""); } } } @@ -1953,9 +2044,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // The inliner does not know how to inline through calls with operand bundles // in general ... + Value *ConvergenceControlToken = nullptr; if (CB.hasOperandBundles()) { for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { - uint32_t Tag = CB.getOperandBundleAt(i).getTagID(); + auto OBUse = CB.getOperandBundleAt(i); + uint32_t Tag = OBUse.getTagID(); // ... but it knows how to inline through "deopt" operand bundles ... if (Tag == LLVMContext::OB_deopt) continue; @@ -1966,11 +2059,37 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, continue; if (Tag == LLVMContext::OB_kcfi) continue; + if (Tag == LLVMContext::OB_convergencectrl) { + ConvergenceControlToken = OBUse.Inputs[0].get(); + continue; + } return InlineResult::failure("unsupported operand bundle"); } } + // FIXME: The check below is redundant and incomplete. According to spec, if a + // convergent call is missing a token, then the caller is using uncontrolled + // convergence. If the callee has an entry intrinsic, then the callee is using + // controlled convergence, and the call cannot be inlined. A proper + // implemenation of this check requires a whole new analysis that identifies + // convergence in every function. For now, we skip that and just do this one + // cursory check. The underlying assumption is that in a compiler flow that + // fully implements convergence control tokens, there is no mixing of + // controlled and uncontrolled convergent operations in the whole program. + if (CB.isConvergent()) { + auto *I = CalledFunc->getEntryBlock().getFirstNonPHI(); + if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) { + if (IntrinsicCall->getIntrinsicID() == + Intrinsic::experimental_convergence_entry) { + if (!ConvergenceControlToken) { + return InlineResult::failure( + "convergent call needs convergencectrl operand"); + } + } + } + } + // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CB.doesNotThrow(); @@ -2260,6 +2379,17 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, IFI.GetAssumptionCache(*Caller).registerAssumption(II); } + if (ConvergenceControlToken) { + auto *I = FirstNewBlock->getFirstNonPHI(); + if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) { + if (IntrinsicCall->getIntrinsicID() == + Intrinsic::experimental_convergence_entry) { + IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); + IntrinsicCall->eraseFromParent(); + } + } + } + // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the @@ -2296,6 +2426,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. + I.setTailBit(true); Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock, AI->getIterator(), I); } @@ -2400,7 +2531,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. if ((InsertLifetime || Caller->isPresplitCoroutine()) && !IFI.StaticAllocas.empty()) { - IRBuilder<> builder(&FirstNewBlock->front()); + IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // Don't mark swifterror allocas. They can't have bitcast uses. @@ -2454,14 +2585,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { - Module *M = Caller->getParent(); - // Get the two intrinsics we care about. - Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); - Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); - // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) - .CreateCall(StackSave, {}, "savedstack"); + .CreateStackSave("savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. @@ -2472,7 +2598,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, continue; if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) continue; - IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); + IRBuilder<>(RI).CreateStackRestore(SavedPtr); } } @@ -2574,6 +2700,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, Builder.CreateRetVoid(); else Builder.CreateRet(NewDeoptCall); + // Since the ret type is changed, remove the incompatible attributes. + NewDeoptCall->removeRetAttrs( + AttributeFuncs::typeIncompatible(NewDeoptCall->getType())); } // Leave behind the normal returns so we can merge control flow. @@ -2704,8 +2833,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, if (IFI.CallerBFI) { // Copy original BB's block frequency to AfterCallBB - IFI.CallerBFI->setBlockFreq( - AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); + IFI.CallerBFI->setBlockFreq(AfterCallBB, + IFI.CallerBFI->getBlockFreq(OrigBB)); } // Change the branch that used to go to AfterCallBB to branch to the first @@ -2731,8 +2860,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!CB.use_empty()) { - PHI = PHINode::Create(RTy, Returns.size(), CB.getName(), - &AfterCallBB->front()); + PHI = PHINode::Create(RTy, Returns.size(), CB.getName()); + PHI->insertBefore(AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. CB.replaceAllUsesWith(PHI); diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp index c36b0533580b..5e0c312fe149 100644 --- a/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -160,7 +160,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, if (SSAUpdate.HasValueForBlock(ExitBB)) continue; PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), - I->getName() + ".lcssa", &ExitBB->front()); + I->getName() + ".lcssa"); + PN->insertBefore(ExitBB->begin()); if (InsertedPHIs) InsertedPHIs->push_back(PN); // Get the debug location from the original instruction. @@ -241,7 +242,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, } SmallVector<DbgValueInst *, 4> DbgValues; - llvm::findDbgValues(DbgValues, I); + SmallVector<DPValue *, 4> DPValues; + llvm::findDbgValues(DbgValues, I, &DPValues); // Update pre-existing debug value uses that reside outside the loop. for (auto *DVI : DbgValues) { @@ -257,6 +259,21 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, DVI->replaceVariableLocationOp(I, V); } + // RemoveDIs: copy-paste of block above, using non-instruction debug-info + // records. + for (DPValue *DPV : DPValues) { + BasicBlock *UserBB = DPV->getMarker()->getParent(); + if (InstBB == UserBB || L->contains(UserBB)) + continue; + // We currently only handle debug values residing in blocks that were + // traversed while rewriting the uses. If we inserted just a single PHI, + // we will handle all relevant debug values. + Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0] + : SSAUpdate.FindValueForBlock(UserBB); + if (V) + DPV->replaceVariableLocationOp(I, V); + } + // SSAUpdater might have inserted phi-nodes inside other loops. We'll need // to post-process them to keep LCSSA form. for (PHINode *InsertedPN : LocalInsertedPHIs) { diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index cdcfb5050bff..6220f8509309 100644 --- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -101,7 +101,7 @@ private: float Val) { Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val)); if (!Arg->getType()->isFloatTy()) - V = ConstantExpr::getFPExtend(V, Arg->getType()); + V = ConstantFoldCastInstruction(Instruction::FPExt, V, Arg->getType()); if (BBBuilder.GetInsertBlock()->getParent()->hasFnAttribute(Attribute::StrictFP)) BBBuilder.setIsFPConstrained(true); return BBBuilder.CreateFCmp(Cmp, Arg, V); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index f153ace5d3fc..51f39e0ba0cc 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -69,6 +69,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -86,6 +87,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +extern cl::opt<bool> UseNewDbgInfoFormat; + #define DEBUG_TYPE "local" STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); @@ -227,9 +230,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Remove weight for this case. std::swap(Weights[Idx + 1], Weights.back()); Weights.pop_back(); - SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(BB->getContext()). - createBranchWeights(Weights)); + setBranchWeights(*SI, Weights); } // Remove this entry. BasicBlock *ParentBB = SI->getParent(); @@ -414,7 +415,7 @@ bool llvm::wouldInstructionBeTriviallyDeadOnUnusedPaths( return wouldInstructionBeTriviallyDead(I, TLI); } -bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, +bool llvm::wouldInstructionBeTriviallyDead(const Instruction *I, const TargetLibraryInfo *TLI) { if (I->isTerminator()) return false; @@ -428,7 +429,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, if (isa<DbgVariableIntrinsic>(I)) return false; - if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) { + if (const DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) { if (DLI->getLabel()) return false; return true; @@ -443,9 +444,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, if (!II) return false; + switch (II->getIntrinsicID()) { + case Intrinsic::experimental_guard: { + // Guards on true are operationally no-ops. In the future we can + // consider more sophisticated tradeoffs for guards considering potential + // for check widening, but for now we keep things simple. + auto *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)); + return Cond && Cond->isOne(); + } // TODO: These intrinsics are not safe to remove, because this may remove // a well-defined trap. - switch (II->getIntrinsicID()) { case Intrinsic::wasm_trunc_signed: case Intrinsic::wasm_trunc_unsigned: case Intrinsic::ptrauth_auth: @@ -461,7 +469,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, // Special case intrinsics that "may have side effects" but can be deleted // when dead. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { // Safe to delete llvm.stacksave and launder.invariant.group if dead. if (II->getIntrinsicID() == Intrinsic::stacksave || II->getIntrinsicID() == Intrinsic::launder_invariant_group) @@ -484,13 +492,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return false; } - // Assumptions are dead if their condition is trivially true. Guards on - // true are operationally no-ops. In the future we can consider more - // sophisticated tradeoffs for guards considering potential for check - // widening, but for now we keep things simple. - if ((II->getIntrinsicID() == Intrinsic::assume && - isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) || - II->getIntrinsicID() == Intrinsic::experimental_guard) { + // Assumptions are dead if their condition is trivially true. + if (II->getIntrinsicID() == Intrinsic::assume && + isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) { if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0))) return !Cond->isZero(); @@ -605,10 +609,13 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions( bool llvm::replaceDbgUsesWithUndef(Instruction *I) { SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; - findDbgUsers(DbgUsers, I); + SmallVector<DPValue *, 1> DPUsers; + findDbgUsers(DbgUsers, I, &DPUsers); for (auto *DII : DbgUsers) DII->setKillLocation(); - return !DbgUsers.empty(); + for (auto *DPV : DPUsers) + DPV->setKillLocation(); + return !DbgUsers.empty() || !DPUsers.empty(); } /// areAllUsesEqual - Check whether the uses of a value are all the same. @@ -847,17 +854,17 @@ static bool CanMergeValues(Value *First, Value *Second) { /// branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. -static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { +static bool +CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ, + const SmallPtrSetImpl<BasicBlock *> &BBPreds) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe - if (Succ->getSinglePredecessor()) return true; - - // Make a list of the predecessors of BB - SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); + if (Succ->getSinglePredecessor()) + return true; // Look at all the phi nodes in Succ, to see if they present a conflict when // merging these blocks @@ -997,6 +1004,35 @@ static void replaceUndefValuesInPhi(PHINode *PN, } } +// Only when they shares a single common predecessor, return true. +// Only handles cases when BB can't be merged while its predecessors can be +// redirected. +static bool +CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, + const SmallPtrSetImpl<BasicBlock *> &BBPreds, + const SmallPtrSetImpl<BasicBlock *> &SuccPreds, + BasicBlock *&CommonPred) { + + // There must be phis in BB, otherwise BB will be merged into Succ directly + if (BB->phis().empty() || Succ->phis().empty()) + return false; + + // BB must have predecessors not shared that can be redirected to Succ + if (!BB->hasNPredecessorsOrMore(2)) + return false; + + // Get single common predecessors of both BB and Succ + for (BasicBlock *SuccPred : SuccPreds) { + if (BBPreds.count(SuccPred)) { + if (CommonPred) + return false; + CommonPred = SuccPred; + } + } + + return true; +} + /// Replace a value flowing from a block to a phi with /// potentially multiple instances of that value flowing from the /// block's predecessors to the phi. @@ -1004,9 +1040,11 @@ static void replaceUndefValuesInPhi(PHINode *PN, /// \param BB The block with the value flowing into the phi. /// \param BBPreds The predecessors of BB. /// \param PN The phi that we are updating. +/// \param CommonPred The common predecessor of BB and PN's BasicBlock static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, const PredBlockVector &BBPreds, - PHINode *PN) { + PHINode *PN, + BasicBlock *CommonPred) { Value *OldVal = PN->removeIncomingValue(BB, false); assert(OldVal && "No entry in PHI for Pred BB!"); @@ -1034,26 +1072,39 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, // will trigger asserts if we try to clean it up now, without also // simplifying the corresponding conditional branch). BasicBlock *PredBB = OldValPN->getIncomingBlock(i); + + if (PredBB == CommonPred) + continue; + Value *PredVal = OldValPN->getIncomingValue(i); - Value *Selected = selectIncomingValueForBlock(PredVal, PredBB, - IncomingValues); + Value *Selected = + selectIncomingValueForBlock(PredVal, PredBB, IncomingValues); // And add a new incoming value for this predecessor for the // newly retargeted branch. PN->addIncoming(Selected, PredBB); } + if (CommonPred) + PN->addIncoming(OldValPN->getIncomingValueForBlock(CommonPred), BB); + } else { for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) { // Update existing incoming values in PN for this // predecessor of BB. BasicBlock *PredBB = BBPreds[i]; - Value *Selected = selectIncomingValueForBlock(OldVal, PredBB, - IncomingValues); + + if (PredBB == CommonPred) + continue; + + Value *Selected = + selectIncomingValueForBlock(OldVal, PredBB, IncomingValues); // And add a new incoming value for this predecessor for the // newly retargeted branch. PN->addIncoming(Selected, PredBB); } + if (CommonPred) + PN->addIncoming(OldVal, BB); } replaceUndefValuesInPhi(PN, IncomingValues); @@ -1064,13 +1115,30 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); - // We can't eliminate infinite loops. + // We can't simplify infinite loops. BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); - if (BB == Succ) return false; + if (BB == Succ) + return false; + + SmallPtrSet<BasicBlock *, 16> BBPreds(pred_begin(BB), pred_end(BB)); + SmallPtrSet<BasicBlock *, 16> SuccPreds(pred_begin(Succ), pred_end(Succ)); - // Check to see if merging these blocks would cause conflicts for any of the - // phi nodes in BB or Succ. If not, we can safely merge. - if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; + // The single common predecessor of BB and Succ when BB cannot be killed + BasicBlock *CommonPred = nullptr; + + bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds); + + // Even if we can not fold bB into Succ, we may be able to redirect the + // predecessors of BB to Succ. + bool BBPhisMergeable = + BBKillable || + CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred); + + if (!BBKillable && !BBPhisMergeable) + return false; + + // Check to see if merging these blocks/phis would cause conflicts for any of + // the phi nodes in BB or Succ. If not, we can safely merge. // Check for cases where Succ has multiple predecessors and a PHI node in BB // has uses which will not disappear when the PHI nodes are merged. It is @@ -1099,6 +1167,11 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, } } + if (BBPhisMergeable && CommonPred) + LLVM_DEBUG(dbgs() << "Found Common Predecessor between: " << BB->getName() + << " and " << Succ->getName() << " : " + << CommonPred->getName() << "\n"); + // 'BB' and 'BB->Pred' are loop latches, bail out to presrve inner loop // metadata. // @@ -1171,25 +1244,37 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, if (PredTI->hasMetadata(LLVMContext::MD_loop)) return false; - LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + if (BBKillable) + LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + else if (BBPhisMergeable) + LLVM_DEBUG(dbgs() << "Merge Phis in Trivial BB: \n" << *BB); SmallVector<DominatorTree::UpdateType, 32> Updates; + if (DTU) { // To avoid processing the same predecessor more than once. SmallPtrSet<BasicBlock *, 8> SeenPreds; - // All predecessors of BB will be moved to Succ. - SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ)); + // All predecessors of BB (except the common predecessor) will be moved to + // Succ. Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1); - for (auto *PredOfBB : predecessors(BB)) - // This predecessor of BB may already have Succ as a successor. - if (!PredsOfSucc.contains(PredOfBB)) + + for (auto *PredOfBB : predecessors(BB)) { + // Do not modify those common predecessors of BB and Succ + if (!SuccPreds.contains(PredOfBB)) if (SeenPreds.insert(PredOfBB).second) Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); + } + SeenPreds.clear(); + for (auto *PredOfBB : predecessors(BB)) - if (SeenPreds.insert(PredOfBB).second) + // When BB cannot be killed, do not remove the edge between BB and + // CommonPred. + if (SeenPreds.insert(PredOfBB).second && PredOfBB != CommonPred) Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); - Updates.push_back({DominatorTree::Delete, BB, Succ}); + + if (BBKillable) + Updates.push_back({DominatorTree::Delete, BB, Succ}); } if (isa<PHINode>(Succ->begin())) { @@ -1201,21 +1286,19 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, // Loop over all of the PHI nodes in the successor of BB. for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); - - redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN, CommonPred); } } if (Succ->getSinglePredecessor()) { // BB is the only predecessor of Succ, so Succ will end up with exactly // the same predecessors BB had. - // Copy over any phi, debug or lifetime instruction. BB->getTerminator()->eraseFromParent(); - Succ->splice(Succ->getFirstNonPHI()->getIterator(), BB); + Succ->splice(Succ->getFirstNonPHIIt(), BB); } else { while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + // We explicitly check for such uses for merging phis. assert(PN->use_empty() && "There shouldn't be any uses here!"); PN->eraseFromParent(); } @@ -1228,26 +1311,42 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, for (BasicBlock *Pred : predecessors(BB)) Pred->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopMD); - // Everything that jumped to BB now goes to Succ. - BB->replaceAllUsesWith(Succ); - if (!Succ->hasName()) Succ->takeName(BB); + if (BBKillable) { + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); - // Clear the successor list of BB to match updates applying to DTU later. - if (BB->getTerminator()) - BB->back().eraseFromParent(); - new UnreachableInst(BB->getContext(), BB); - assert(succ_empty(BB) && "The successor list of BB isn't empty before " - "applying corresponding DTU updates."); + if (!Succ->hasName()) + Succ->takeName(BB); + + // Clear the successor list of BB to match updates applying to DTU later. + if (BB->getTerminator()) + BB->back().eraseFromParent(); + + new UnreachableInst(BB->getContext(), BB); + assert(succ_empty(BB) && "The successor list of BB isn't empty before " + "applying corresponding DTU updates."); + } else if (BBPhisMergeable) { + // Everything except CommonPred that jumped to BB now goes to Succ. + BB->replaceUsesWithIf(Succ, [BBPreds, CommonPred](Use &U) -> bool { + if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser())) + return UseInst->getParent() != CommonPred && + BBPreds.contains(UseInst->getParent()); + return false; + }); + } if (DTU) DTU->applyUpdates(Updates); - DeleteDeadBlock(BB, DTU); + if (BBKillable) + DeleteDeadBlock(BB, DTU); return true; } -static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { +static bool +EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB, + SmallPtrSetImpl<PHINode *> &ToRemove) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. @@ -1263,12 +1362,14 @@ static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { // Note that we only look in the upper square's triangle, // we already checked that the lower triangle PHI's aren't identical. for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) { + if (ToRemove.contains(DuplicatePN)) + continue; if (!DuplicatePN->isIdenticalToWhenDefined(PN)) continue; // A duplicate. Replace this PHI with the base PHI. ++NumPHICSEs; DuplicatePN->replaceAllUsesWith(PN); - DuplicatePN->eraseFromParent(); + ToRemove.insert(DuplicatePN); Changed = true; // The RAUW can change PHIs that we already visited. @@ -1279,7 +1380,9 @@ static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { return Changed; } -static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { +static bool +EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB, + SmallPtrSetImpl<PHINode *> &ToRemove) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. @@ -1343,12 +1446,14 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { // Examine each PHI. bool Changed = false; for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) { + if (ToRemove.contains(PN)) + continue; auto Inserted = PHISet.insert(PN); if (!Inserted.second) { // A duplicate. Replace this PHI with its duplicate. ++NumPHICSEs; PN->replaceAllUsesWith(*Inserted.first); - PN->eraseFromParent(); + ToRemove.insert(PN); Changed = true; // The RAUW can change PHIs that we already visited. Start over from the @@ -1361,25 +1466,27 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { return Changed; } -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB, + SmallPtrSetImpl<PHINode *> &ToRemove) { if ( #ifndef NDEBUG !PHICSEDebugHash && #endif hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) - return EliminateDuplicatePHINodesNaiveImpl(BB); - return EliminateDuplicatePHINodesSetBasedImpl(BB); + return EliminateDuplicatePHINodesNaiveImpl(BB, ToRemove); + return EliminateDuplicatePHINodesSetBasedImpl(BB, ToRemove); } -/// If the specified pointer points to an object that we control, try to modify -/// the object's alignment to PrefAlign. Returns a minimum known alignment of -/// the value after the operation, which may be lower than PrefAlign. -/// -/// Increating value alignment isn't often possible though. If alignment is -/// important, a more reliable approach is to simply align all global variables -/// and allocation instructions to their preferred alignment from the beginning. -static Align tryEnforceAlignment(Value *V, Align PrefAlign, - const DataLayout &DL) { +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + SmallPtrSet<PHINode *, 8> ToRemove; + bool Changed = EliminateDuplicatePHINodes(BB, ToRemove); + for (PHINode *PN : ToRemove) + PN->eraseFromParent(); + return Changed; +} + +Align llvm::tryEnforceAlignment(Value *V, Align PrefAlign, + const DataLayout &DL) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { @@ -1463,12 +1570,18 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, // is removed by LowerDbgDeclare(), we need to make sure that we are // not inserting the same dbg.value intrinsic over and over. SmallVector<DbgValueInst *, 1> DbgValues; - findDbgValues(DbgValues, APN); + SmallVector<DPValue *, 1> DPValues; + findDbgValues(DbgValues, APN, &DPValues); for (auto *DVI : DbgValues) { assert(is_contained(DVI->getValues(), APN)); if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr)) return true; } + for (auto *DPV : DPValues) { + assert(is_contained(DPV->location_ops(), APN)); + if ((DPV->getVariable() == DIVar) && (DPV->getExpression() == DIExpr)) + return true; + } return false; } @@ -1504,6 +1617,67 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { // Could not determine size of variable. Conservatively return false. return false; } +// RemoveDIs: duplicate implementation of the above, using DPValues, the +// replacement for dbg.values. +static bool valueCoversEntireFragment(Type *ValTy, DPValue *DPV) { + const DataLayout &DL = DPV->getModule()->getDataLayout(); + TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy); + if (std::optional<uint64_t> FragmentSize = DPV->getFragmentSizeInBits()) + return TypeSize::isKnownGE(ValueSize, TypeSize::getFixed(*FragmentSize)); + + // We can't always calculate the size of the DI variable (e.g. if it is a + // VLA). Try to use the size of the alloca that the dbg intrinsic describes + // intead. + if (DPV->isAddressOfVariable()) { + // DPV should have exactly 1 location when it is an address. + assert(DPV->getNumVariableLocationOps() == 1 && + "address of variable must have exactly 1 location operand."); + if (auto *AI = + dyn_cast_or_null<AllocaInst>(DPV->getVariableLocationOp(0))) { + if (std::optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) { + return TypeSize::isKnownGE(ValueSize, *FragmentSize); + } + } + } + // Could not determine size of variable. Conservatively return false. + return false; +} + +static void insertDbgValueOrDPValue(DIBuilder &Builder, Value *DV, + DILocalVariable *DIVar, + DIExpression *DIExpr, + const DebugLoc &NewLoc, + BasicBlock::iterator Instr) { + if (!UseNewDbgInfoFormat) { + auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, + (Instruction *)nullptr); + DbgVal->insertBefore(Instr); + } else { + // RemoveDIs: if we're using the new debug-info format, allocate a + // DPValue directly instead of a dbg.value intrinsic. + ValueAsMetadata *DVAM = ValueAsMetadata::get(DV); + DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get()); + Instr->getParent()->insertDPValueBefore(DV, Instr); + } +} + +static void insertDbgValueOrDPValueAfter(DIBuilder &Builder, Value *DV, + DILocalVariable *DIVar, + DIExpression *DIExpr, + const DebugLoc &NewLoc, + BasicBlock::iterator Instr) { + if (!UseNewDbgInfoFormat) { + auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, + (Instruction *)nullptr); + DbgVal->insertAfter(&*Instr); + } else { + // RemoveDIs: if we're using the new debug-info format, allocate a + // DPValue directly instead of a dbg.value intrinsic. + ValueAsMetadata *DVAM = ValueAsMetadata::get(DV); + DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get()); + Instr->getParent()->insertDPValueAfter(DV, &*Instr); + } +} /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.declare intrinsic. @@ -1533,7 +1707,8 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, DIExpr->isDeref() || (!DIExpr->startsWithDeref() && valueCoversEntireFragment(DV->getType(), DII)); if (CanConvert) { - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); + insertDbgValueOrDPValue(Builder, DV, DIVar, DIExpr, NewLoc, + SI->getIterator()); return; } @@ -1545,7 +1720,19 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, // know which part) we insert an dbg.value intrinsic to indicate that we // know nothing about the variable's content. DV = UndefValue::get(DV->getType()); - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); + insertDbgValueOrDPValue(Builder, DV, DIVar, DIExpr, NewLoc, + SI->getIterator()); +} + +// RemoveDIs: duplicate the getDebugValueLoc method using DPValues instead of +// dbg.value intrinsics. +static DebugLoc getDebugValueLocDPV(DPValue *DPV) { + // Original dbg.declare must have a location. + const DebugLoc &DeclareLoc = DPV->getDebugLoc(); + MDNode *Scope = DeclareLoc.getScope(); + DILocation *InlinedAt = DeclareLoc.getInlinedAt(); + // Produce an unknown location with the correct scope / inlinedAt fields. + return DILocation::get(DPV->getContext(), 0, 0, Scope, InlinedAt); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value @@ -1571,9 +1758,40 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, // future if multi-location support is added to the IR, it might be // preferable to keep tracking both the loaded value and the original // address in case the alloca can not be elided. - Instruction *DbgValue = Builder.insertDbgValueIntrinsic( - LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr); - DbgValue->insertAfter(LI); + insertDbgValueOrDPValueAfter(Builder, LI, DIVar, DIExpr, NewLoc, + LI->getIterator()); +} + +void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, StoreInst *SI, + DIBuilder &Builder) { + assert(DPV->isAddressOfVariable()); + auto *DIVar = DPV->getVariable(); + assert(DIVar && "Missing variable"); + auto *DIExpr = DPV->getExpression(); + Value *DV = SI->getValueOperand(); + + DebugLoc NewLoc = getDebugValueLocDPV(DPV); + + if (!valueCoversEntireFragment(DV->getType(), DPV)) { + // FIXME: If storing to a part of the variable described by the dbg.declare, + // then we want to insert a DPValue.value for the corresponding fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV + << '\n'); + // For now, when there is a store to parts of the variable (but we do not + // know which part) we insert an DPValue record to indicate that we know + // nothing about the variable's content. + DV = UndefValue::get(DV->getType()); + ValueAsMetadata *DVAM = ValueAsMetadata::get(DV); + DPValue *NewDPV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get()); + SI->getParent()->insertDPValueBefore(NewDPV, SI->getIterator()); + return; + } + + assert(UseNewDbgInfoFormat); + // Create a DPValue directly and insert. + ValueAsMetadata *DVAM = ValueAsMetadata::get(DV); + DPValue *NewDPV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get()); + SI->getParent()->insertDPValueBefore(NewDPV, SI->getIterator()); } /// Inserts a llvm.dbg.value intrinsic after a phi that has an associated @@ -1604,8 +1822,38 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, // The block may be a catchswitch block, which does not have a valid // insertion point. // FIXME: Insert dbg.value markers in the successors when appropriate. - if (InsertionPt != BB->end()) - Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt); + if (InsertionPt != BB->end()) { + insertDbgValueOrDPValue(Builder, APN, DIVar, DIExpr, NewLoc, InsertionPt); + } +} + +void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, LoadInst *LI, + DIBuilder &Builder) { + auto *DIVar = DPV->getVariable(); + auto *DIExpr = DPV->getExpression(); + assert(DIVar && "Missing variable"); + + if (!valueCoversEntireFragment(LI->getType(), DPV)) { + // FIXME: If only referring to a part of the variable described by the + // dbg.declare, then we want to insert a DPValue for the corresponding + // fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV + << '\n'); + return; + } + + DebugLoc NewLoc = getDebugValueLocDPV(DPV); + + // We are now tracking the loaded value instead of the address. In the + // future if multi-location support is added to the IR, it might be + // preferable to keep tracking both the loaded value and the original + // address in case the alloca can not be elided. + assert(UseNewDbgInfoFormat); + + // Create a DPValue directly and insert. + ValueAsMetadata *LIVAM = ValueAsMetadata::get(LI); + DPValue *DV = new DPValue(LIVAM, DIVar, DIExpr, NewLoc.get()); + LI->getParent()->insertDPValueAfter(DV, LI); } /// Determine whether this alloca is either a VLA or an array. @@ -1618,6 +1866,36 @@ static bool isArray(AllocaInst *AI) { static bool isStructure(AllocaInst *AI) { return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy(); } +void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, PHINode *APN, + DIBuilder &Builder) { + auto *DIVar = DPV->getVariable(); + auto *DIExpr = DPV->getExpression(); + assert(DIVar && "Missing variable"); + + if (PhiHasDebugValue(DIVar, DIExpr, APN)) + return; + + if (!valueCoversEntireFragment(APN->getType(), DPV)) { + // FIXME: If only referring to a part of the variable described by the + // dbg.declare, then we want to insert a DPValue for the corresponding + // fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV + << '\n'); + return; + } + + BasicBlock *BB = APN->getParent(); + auto InsertionPt = BB->getFirstInsertionPt(); + + DebugLoc NewLoc = getDebugValueLocDPV(DPV); + + // The block may be a catchswitch block, which does not have a valid + // insertion point. + // FIXME: Insert DPValue markers in the successors when appropriate. + if (InsertionPt != BB->end()) { + insertDbgValueOrDPValue(Builder, APN, DIVar, DIExpr, NewLoc, InsertionPt); + } +} /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set /// of llvm.dbg.value intrinsics. @@ -1674,8 +1952,8 @@ bool llvm::LowerDbgDeclare(Function &F) { DebugLoc NewLoc = getDebugValueLoc(DDI); auto *DerefExpr = DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); - DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, - NewLoc, CI); + insertDbgValueOrDPValue(DIB, AI, DDI->getVariable(), DerefExpr, + NewLoc, CI->getIterator()); } } else if (BitCastInst *BI = dyn_cast<BitCastInst>(U)) { if (BI->getType()->isPointerTy()) @@ -1694,6 +1972,69 @@ bool llvm::LowerDbgDeclare(Function &F) { return Changed; } +// RemoveDIs: re-implementation of insertDebugValuesForPHIs, but which pulls the +// debug-info out of the block's DPValues rather than dbg.value intrinsics. +static void insertDPValuesForPHIs(BasicBlock *BB, + SmallVectorImpl<PHINode *> &InsertedPHIs) { + assert(BB && "No BasicBlock to clone DPValue(s) from."); + if (InsertedPHIs.size() == 0) + return; + + // Map existing PHI nodes to their DPValues. + DenseMap<Value *, DPValue *> DbgValueMap; + for (auto &I : *BB) { + for (auto &DPV : I.getDbgValueRange()) { + for (Value *V : DPV.location_ops()) + if (auto *Loc = dyn_cast_or_null<PHINode>(V)) + DbgValueMap.insert({Loc, &DPV}); + } + } + if (DbgValueMap.size() == 0) + return; + + // Map a pair of the destination BB and old DPValue to the new DPValue, + // so that if a DPValue is being rewritten to use more than one of the + // inserted PHIs in the same destination BB, we can update the same DPValue + // with all the new PHIs instead of creating one copy for each. + MapVector<std::pair<BasicBlock *, DPValue *>, DPValue *> NewDbgValueMap; + // Then iterate through the new PHIs and look to see if they use one of the + // previously mapped PHIs. If so, create a new DPValue that will propagate + // the info through the new PHI. If we use more than one new PHI in a single + // destination BB with the same old dbg.value, merge the updates so that we + // get a single new DPValue with all the new PHIs. + for (auto PHI : InsertedPHIs) { + BasicBlock *Parent = PHI->getParent(); + // Avoid inserting a debug-info record into an EH block. + if (Parent->getFirstNonPHI()->isEHPad()) + continue; + for (auto VI : PHI->operand_values()) { + auto V = DbgValueMap.find(VI); + if (V != DbgValueMap.end()) { + DPValue *DbgII = cast<DPValue>(V->second); + auto NewDI = NewDbgValueMap.find({Parent, DbgII}); + if (NewDI == NewDbgValueMap.end()) { + DPValue *NewDbgII = DbgII->clone(); + NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first; + } + DPValue *NewDbgII = NewDI->second; + // If PHI contains VI as an operand more than once, we may + // replaced it in NewDbgII; confirm that it is present. + if (is_contained(NewDbgII->location_ops(), VI)) + NewDbgII->replaceVariableLocationOp(VI, PHI); + } + } + } + // Insert the new DPValues into their destination blocks. + for (auto DI : NewDbgValueMap) { + BasicBlock *Parent = DI.first.first; + DPValue *NewDbgII = DI.second; + auto InsertionPt = Parent->getFirstInsertionPt(); + assert(InsertionPt != Parent->end() && "Ill-formed basic block"); + + InsertionPt->DbgMarker->insertDPValue(NewDbgII, true); + } +} + /// Propagate dbg.value intrinsics through the newly inserted PHIs. void llvm::insertDebugValuesForPHIs(BasicBlock *BB, SmallVectorImpl<PHINode *> &InsertedPHIs) { @@ -1701,6 +2042,8 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB, if (InsertedPHIs.size() == 0) return; + insertDPValuesForPHIs(BB, InsertedPHIs); + // Map existing PHI nodes to their dbg.values. ValueToValueMapTy DbgValueMap; for (auto &I : *BB) { @@ -1775,44 +2118,60 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, return !DbgDeclares.empty(); } -static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, - DIBuilder &Builder, int Offset) { - const DebugLoc &Loc = DVI->getDebugLoc(); - auto *DIVar = DVI->getVariable(); - auto *DIExpr = DVI->getExpression(); +static void updateOneDbgValueForAlloca(const DebugLoc &Loc, + DILocalVariable *DIVar, + DIExpression *DIExpr, Value *NewAddress, + DbgValueInst *DVI, DPValue *DPV, + DIBuilder &Builder, int Offset) { assert(DIVar && "Missing variable"); - // This is an alloca-based llvm.dbg.value. The first thing it should do with - // the alloca pointer is dereference it. Otherwise we don't know how to handle - // it and give up. + // This is an alloca-based dbg.value/DPValue. The first thing it should do + // with the alloca pointer is dereference it. Otherwise we don't know how to + // handle it and give up. if (!DIExpr || DIExpr->getNumElements() < 1 || DIExpr->getElement(0) != dwarf::DW_OP_deref) return; // Insert the offset before the first deref. - // We could just change the offset argument of dbg.value, but it's unsigned... if (Offset) DIExpr = DIExpression::prepend(DIExpr, 0, Offset); - Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI); - DVI->eraseFromParent(); + if (DVI) { + DVI->setExpression(DIExpr); + DVI->replaceVariableLocationOp(0u, NewAddress); + } else { + assert(DPV); + DPV->setExpression(DIExpr); + DPV->replaceVariableLocationOp(0u, NewAddress); + } } void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, DIBuilder &Builder, int Offset) { - if (auto *L = LocalAsMetadata::getIfExists(AI)) - if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) - for (Use &U : llvm::make_early_inc_range(MDV->uses())) - if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser())) - replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset); + SmallVector<DbgValueInst *, 1> DbgUsers; + SmallVector<DPValue *, 1> DPUsers; + findDbgValues(DbgUsers, AI, &DPUsers); + + // Attempt to replace dbg.values that use this alloca. + for (auto *DVI : DbgUsers) + updateOneDbgValueForAlloca(DVI->getDebugLoc(), DVI->getVariable(), + DVI->getExpression(), NewAllocaAddress, DVI, + nullptr, Builder, Offset); + + // Replace any DPValues that use this alloca. + for (DPValue *DPV : DPUsers) + updateOneDbgValueForAlloca(DPV->getDebugLoc(), DPV->getVariable(), + DPV->getExpression(), NewAllocaAddress, nullptr, + DPV, Builder, Offset); } /// Where possible to salvage debug information for \p I do so. /// If not possible mark undef. void llvm::salvageDebugInfo(Instruction &I) { SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; - findDbgUsers(DbgUsers, &I); - salvageDebugInfoForDbgValues(I, DbgUsers); + SmallVector<DPValue *, 1> DPUsers; + findDbgUsers(DbgUsers, &I, &DPUsers); + salvageDebugInfoForDbgValues(I, DbgUsers, DPUsers); } /// Salvage the address component of \p DAI. @@ -1850,7 +2209,8 @@ static void salvageDbgAssignAddress(DbgAssignIntrinsic *DAI) { } void llvm::salvageDebugInfoForDbgValues( - Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) { + Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers, + ArrayRef<DPValue *> DPUsers) { // These are arbitrary chosen limits on the maximum number of values and the // maximum size of a debug expression we can salvage up to, used for // performance reasons. @@ -1916,12 +2276,70 @@ void llvm::salvageDebugInfoForDbgValues( LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); Salvaged = true; } + // Duplicate of above block for DPValues. + for (auto *DPV : DPUsers) { + // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they + // are implicitly pointing out the value as a DWARF memory location + // description. + bool StackValue = DPV->getType() == DPValue::LocationType::Value; + auto DPVLocation = DPV->location_ops(); + assert( + is_contained(DPVLocation, &I) && + "DbgVariableIntrinsic must use salvaged instruction as its location"); + SmallVector<Value *, 4> AdditionalValues; + // 'I' may appear more than once in DPV's location ops, and each use of 'I' + // must be updated in the DIExpression and potentially have additional + // values added; thus we call salvageDebugInfoImpl for each 'I' instance in + // DPVLocation. + Value *Op0 = nullptr; + DIExpression *SalvagedExpr = DPV->getExpression(); + auto LocItr = find(DPVLocation, &I); + while (SalvagedExpr && LocItr != DPVLocation.end()) { + SmallVector<uint64_t, 16> Ops; + unsigned LocNo = std::distance(DPVLocation.begin(), LocItr); + uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands(); + Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues); + if (!Op0) + break; + SalvagedExpr = + DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue); + LocItr = std::find(++LocItr, DPVLocation.end(), &I); + } + // salvageDebugInfoImpl should fail on examining the first element of + // DbgUsers, or none of them. + if (!Op0) + break; + + DPV->replaceVariableLocationOp(&I, Op0); + bool IsValidSalvageExpr = + SalvagedExpr->getNumElements() <= MaxExpressionSize; + if (AdditionalValues.empty() && IsValidSalvageExpr) { + DPV->setExpression(SalvagedExpr); + } else if (DPV->getType() == DPValue::LocationType::Value && + IsValidSalvageExpr && + DPV->getNumVariableLocationOps() + AdditionalValues.size() <= + MaxDebugArgs) { + DPV->addVariableLocationOps(AdditionalValues, SalvagedExpr); + } else { + // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is + // currently only valid for stack value expressions. + // Also do not salvage if the resulting DIArgList would contain an + // unreasonably large number of values. + Value *Undef = UndefValue::get(I.getOperand(0)->getType()); + DPV->replaceVariableLocationOp(I.getOperand(0), Undef); + } + LLVM_DEBUG(dbgs() << "SALVAGE: " << DPV << '\n'); + Salvaged = true; + } if (Salvaged) return; for (auto *DII : DbgUsers) DII->setKillLocation(); + + for (auto *DPV : DPUsers) + DPV->setKillLocation(); } Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL, @@ -2136,16 +2554,20 @@ using DbgValReplacement = std::optional<DIExpression *>; /// changes are made. static bool rewriteDebugUsers( Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT, - function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) { + function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr, + function_ref<DbgValReplacement(DPValue &DPV)> RewriteDPVExpr) { // Find debug users of From. SmallVector<DbgVariableIntrinsic *, 1> Users; - findDbgUsers(Users, &From); - if (Users.empty()) + SmallVector<DPValue *, 1> DPUsers; + findDbgUsers(Users, &From, &DPUsers); + if (Users.empty() && DPUsers.empty()) return false; // Prevent use-before-def of To. bool Changed = false; + SmallPtrSet<DbgVariableIntrinsic *, 1> UndefOrSalvage; + SmallPtrSet<DPValue *, 1> UndefOrSalvageDPV; if (isa<Instruction>(&To)) { bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint; @@ -2163,6 +2585,25 @@ static bool rewriteDebugUsers( UndefOrSalvage.insert(DII); } } + + // DPValue implementation of the above. + for (auto *DPV : DPUsers) { + Instruction *MarkedInstr = DPV->getMarker()->MarkedInstr; + Instruction *NextNonDebug = MarkedInstr; + // The next instruction might still be a dbg.declare, skip over it. + if (isa<DbgVariableIntrinsic>(NextNonDebug)) + NextNonDebug = NextNonDebug->getNextNonDebugInstruction(); + + if (DomPointAfterFrom && NextNonDebug == &DomPoint) { + LLVM_DEBUG(dbgs() << "MOVE: " << *DPV << '\n'); + DPV->removeFromParent(); + // Ensure there's a marker. + DomPoint.getParent()->insertDPValueAfter(DPV, &DomPoint); + Changed = true; + } else if (!DT.dominates(&DomPoint, MarkedInstr)) { + UndefOrSalvageDPV.insert(DPV); + } + } } // Update debug users without use-before-def risk. @@ -2179,8 +2620,21 @@ static bool rewriteDebugUsers( LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n'); Changed = true; } + for (auto *DPV : DPUsers) { + if (UndefOrSalvageDPV.count(DPV)) + continue; - if (!UndefOrSalvage.empty()) { + DbgValReplacement DVR = RewriteDPVExpr(*DPV); + if (!DVR) + continue; + + DPV->replaceVariableLocationOp(&From, &To); + DPV->setExpression(*DVR); + LLVM_DEBUG(dbgs() << "REWRITE: " << DPV << '\n'); + Changed = true; + } + + if (!UndefOrSalvage.empty() || !UndefOrSalvageDPV.empty()) { // Try to salvage the remaining debug users. salvageDebugInfo(From); Changed = true; @@ -2228,12 +2682,15 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement { return DII.getExpression(); }; + auto IdentityDPV = [&](DPValue &DPV) -> DbgValReplacement { + return DPV.getExpression(); + }; // Handle no-op conversions. Module &M = *From.getModule(); const DataLayout &DL = M.getDataLayout(); if (isBitCastSemanticsPreserving(DL, FromTy, ToTy)) - return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + return rewriteDebugUsers(From, To, DomPoint, DT, Identity, IdentityDPV); // Handle integer-to-integer widening and narrowing. // FIXME: Use DW_OP_convert when it's available everywhere. @@ -2245,7 +2702,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, // When the width of the result grows, assume that a debugger will only // access the low `FromBits` bits when inspecting the source variable. if (FromBits < ToBits) - return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + return rewriteDebugUsers(From, To, DomPoint, DT, Identity, IdentityDPV); // The width of the result has shrunk. Use sign/zero extension to describe // the source variable's high bits. @@ -2261,7 +2718,22 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits, Signed); }; - return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt); + // RemoveDIs: duplicate implementation working on DPValues rather than on + // dbg.value intrinsics. + auto SignOrZeroExtDPV = [&](DPValue &DPV) -> DbgValReplacement { + DILocalVariable *Var = DPV.getVariable(); + + // Without knowing signedness, sign/zero extension isn't possible. + auto Signedness = Var->getSignedness(); + if (!Signedness) + return std::nullopt; + + bool Signed = *Signedness == DIBasicType::Signedness::Signed; + return DIExpression::appendExt(DPV.getExpression(), ToBits, FromBits, + Signed); + }; + return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt, + SignOrZeroExtDPV); } // TODO: Floating-point conversions, vectors. @@ -2275,12 +2747,17 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { // Delete the instructions backwards, as it has a reduced likelihood of // having to update as many def-use and use-def chains. Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + // RemoveDIs: erasing debug-info must be done manually. + EndInst->dropDbgValues(); while (EndInst != &BB->front()) { // Delete the next to last instruction. Instruction *Inst = &*--EndInst->getIterator(); if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType())); if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { + // EHPads can't have DPValues attached to them, but it might be possible + // for things with token type. + Inst->dropDbgValues(); EndInst = Inst; continue; } @@ -2288,6 +2765,8 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { ++NumDeadDbgInst; else ++NumDeadInst; + // RemoveDIs: erasing debug-info must be done manually. + Inst->dropDbgValues(); Inst->eraseFromParent(); } return {NumDeadInst, NumDeadDbgInst}; @@ -2329,6 +2808,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA, Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); DTU->applyUpdates(Updates); } + BB->flushTerminatorDbgValues(); return NumInstrsRemoved; } @@ -2482,9 +2962,9 @@ static bool markAliveBlocks(Function &F, // If we found a call to a no-return function, insert an unreachable // instruction after it. Make sure there isn't *already* one there // though. - if (!isa<UnreachableInst>(CI->getNextNode())) { + if (!isa<UnreachableInst>(CI->getNextNonDebugInstruction())) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI->getNextNode(), false, DTU); + changeToUnreachable(CI->getNextNonDebugInstruction(), false, DTU); Changed = true; } break; @@ -2896,9 +3376,10 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To, for (Use &U : llvm::make_early_inc_range(From->uses())) { if (!Dominates(Root, U)) continue; + LLVM_DEBUG(dbgs() << "Replace dominated use of '"; + From->printAsOperand(dbgs()); + dbgs() << "' with " << *To << " in " << *U.getUser() << "\n"); U.set(To); - LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName() - << "' as " << *To << " in " << *U << "\n"); ++Count; } return Count; @@ -3017,9 +3498,12 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, void llvm::dropDebugUsers(Instruction &I) { SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; - findDbgUsers(DbgUsers, &I); + SmallVector<DPValue *, 1> DPUsers; + findDbgUsers(DbgUsers, &I, &DPUsers); for (auto *DII : DbgUsers) DII->eraseFromParent(); + for (auto *DPV : DPUsers) + DPV->eraseFromParent(); } void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, @@ -3051,6 +3535,8 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, I->dropUBImplyingAttrsAndMetadata(); if (I->isUsedByMetadata()) dropDebugUsers(*I); + // RemoveDIs: drop debug-info too as the following code does. + I->dropDbgValues(); if (I->isDebugOrPseudoInst()) { // Remove DbgInfo and pseudo probe Intrinsics. II = I->eraseFromParent(); @@ -3063,6 +3549,41 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BB->getTerminator()->getIterator()); } +DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C, + Type &Ty) { + // Create integer constant expression. + auto createIntegerExpression = [&DIB](const Constant &CV) -> DIExpression * { + const APInt &API = cast<ConstantInt>(&CV)->getValue(); + std::optional<int64_t> InitIntOpt = API.trySExtValue(); + return InitIntOpt ? DIB.createConstantValueExpression( + static_cast<uint64_t>(*InitIntOpt)) + : nullptr; + }; + + if (isa<ConstantInt>(C)) + return createIntegerExpression(C); + + if (Ty.isFloatTy() || Ty.isDoubleTy()) { + const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF(); + return DIB.createConstantValueExpression( + APF.bitcastToAPInt().getZExtValue()); + } + + if (!Ty.isPointerTy()) + return nullptr; + + if (isa<ConstantPointerNull>(C)) + return DIB.createConstantValueExpression(0); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(&C)) + if (CE->getOpcode() == Instruction::IntToPtr) { + const Value *V = CE->getOperand(0); + if (auto CI = dyn_cast_or_null<ConstantInt>(V)) + return createIntegerExpression(*CI); + } + return nullptr; +} + namespace { /// A potential constituent of a bitreverse or bswap expression. See diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp new file mode 100644 index 000000000000..ea6d952cfa7d --- /dev/null +++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp @@ -0,0 +1,904 @@ +#include "llvm/Transforms/Utils/LoopConstrainer.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +using namespace llvm; + +static const char *ClonedLoopTag = "loop_constrainer.loop.clone"; + +#define DEBUG_TYPE "loop-constrainer" + +/// Given a loop with an deccreasing induction variable, is it possible to +/// safely calculate the bounds of a new loop using the given Predicate. +static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, + const SCEV *Step, ICmpInst::Predicate Pred, + unsigned LatchBrExitIdx, Loop *L, + ScalarEvolution &SE) { + if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT && + Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT) + return false; + + if (!SE.isAvailableAtLoopEntry(BoundSCEV, L)) + return false; + + assert(SE.isKnownNegative(Step) && "expecting negative step"); + + LLVM_DEBUG(dbgs() << "isSafeDecreasingBound with:\n"); + LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n"); + LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n"); + LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n"); + LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n"); + LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n"); + + bool IsSigned = ICmpInst::isSigned(Pred); + // The predicate that we need to check that the induction variable lies + // within bounds. + ICmpInst::Predicate BoundPred = + IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; + + if (LatchBrExitIdx == 1) + return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); + + assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1"); + + const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType())); + unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); + APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) + : APInt::getMinValue(BitWidth); + const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne); + + const SCEV *MinusOne = + SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType())); + + return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit); +} + +/// Given a loop with an increasing induction variable, is it possible to +/// safely calculate the bounds of a new loop using the given Predicate. +static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, + const SCEV *Step, ICmpInst::Predicate Pred, + unsigned LatchBrExitIdx, Loop *L, + ScalarEvolution &SE) { + if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT && + Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT) + return false; + + if (!SE.isAvailableAtLoopEntry(BoundSCEV, L)) + return false; + + LLVM_DEBUG(dbgs() << "isSafeIncreasingBound with:\n"); + LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n"); + LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n"); + LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n"); + LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n"); + LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n"); + + bool IsSigned = ICmpInst::isSigned(Pred); + // The predicate that we need to check that the induction variable lies + // within bounds. + ICmpInst::Predicate BoundPred = + IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + + if (LatchBrExitIdx == 1) + return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); + + assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1"); + + const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType())); + unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); + APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) + : APInt::getMaxValue(BitWidth); + const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); + + return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start, + SE.getAddExpr(BoundSCEV, Step)) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit)); +} + +/// Returns estimate for max latch taken count of the loop of the narrowest +/// available type. If the latch block has such estimate, it is returned. +/// Otherwise, we use max exit count of whole loop (that is potentially of wider +/// type than latch check itself), which is still better than no estimate. +static const SCEV *getNarrowestLatchMaxTakenCountEstimate(ScalarEvolution &SE, + const Loop &L) { + const SCEV *FromBlock = + SE.getExitCount(&L, L.getLoopLatch(), ScalarEvolution::SymbolicMaximum); + if (isa<SCEVCouldNotCompute>(FromBlock)) + return SE.getSymbolicMaxBackedgeTakenCount(&L); + return FromBlock; +} + +std::optional<LoopStructure> +LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L, + bool AllowUnsignedLatchCond, + const char *&FailureReason) { + if (!L.isLoopSimplifyForm()) { + FailureReason = "loop not in LoopSimplify form"; + return std::nullopt; + } + + BasicBlock *Latch = L.getLoopLatch(); + assert(Latch && "Simplified loops only have one latch!"); + + if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) { + FailureReason = "loop has already been cloned"; + return std::nullopt; + } + + if (!L.isLoopExiting(Latch)) { + FailureReason = "no loop latch"; + return std::nullopt; + } + + BasicBlock *Header = L.getHeader(); + BasicBlock *Preheader = L.getLoopPreheader(); + if (!Preheader) { + FailureReason = "no preheader"; + return std::nullopt; + } + + BranchInst *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LatchBr || LatchBr->isUnconditional()) { + FailureReason = "latch terminator not conditional branch"; + return std::nullopt; + } + + unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0; + + ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition()); + if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) { + FailureReason = "latch terminator branch not conditional on integral icmp"; + return std::nullopt; + } + + const SCEV *MaxBETakenCount = getNarrowestLatchMaxTakenCountEstimate(SE, L); + if (isa<SCEVCouldNotCompute>(MaxBETakenCount)) { + FailureReason = "could not compute latch count"; + return std::nullopt; + } + assert(SE.getLoopDisposition(MaxBETakenCount, &L) == + ScalarEvolution::LoopInvariant && + "loop variant exit count doesn't make sense!"); + + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LeftValue = ICI->getOperand(0); + const SCEV *LeftSCEV = SE.getSCEV(LeftValue); + IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType()); + + Value *RightValue = ICI->getOperand(1); + const SCEV *RightSCEV = SE.getSCEV(RightValue); + + // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence. + if (!isa<SCEVAddRecExpr>(LeftSCEV)) { + if (isa<SCEVAddRecExpr>(RightSCEV)) { + std::swap(LeftSCEV, RightSCEV); + std::swap(LeftValue, RightValue); + Pred = ICmpInst::getSwappedPredicate(Pred); + } else { + FailureReason = "no add recurrences in the icmp"; + return std::nullopt; + } + } + + auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) { + if (AR->getNoWrapFlags(SCEV::FlagNSW)) + return true; + + IntegerType *Ty = cast<IntegerType>(AR->getType()); + IntegerType *WideTy = + IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); + + const SCEVAddRecExpr *ExtendAfterOp = + dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); + if (ExtendAfterOp) { + const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); + const SCEV *ExtendedStep = + SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); + + bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && + ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; + + if (NoSignedWrap) + return true; + } + + // We may have proved this when computing the sign extension above. + return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap; + }; + + // `ICI` is interpreted as taking the backedge if the *next* value of the + // induction variable satisfies some constraint. + + const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV); + if (IndVarBase->getLoop() != &L) { + FailureReason = "LHS in cmp is not an AddRec for this loop"; + return std::nullopt; + } + if (!IndVarBase->isAffine()) { + FailureReason = "LHS in icmp not induction variable"; + return std::nullopt; + } + const SCEV *StepRec = IndVarBase->getStepRecurrence(SE); + if (!isa<SCEVConstant>(StepRec)) { + FailureReason = "LHS in icmp not induction variable"; + return std::nullopt; + } + ConstantInt *StepCI = cast<SCEVConstant>(StepRec)->getValue(); + + if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) { + FailureReason = "LHS in icmp needs nsw for equality predicates"; + return std::nullopt; + } + + assert(!StepCI->isZero() && "Zero step?"); + bool IsIncreasing = !StepCI->isNegative(); + bool IsSignedPredicate; + const SCEV *StartNext = IndVarBase->getStart(); + const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE)); + const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); + const SCEV *Step = SE.getSCEV(StepCI); + + const SCEV *FixedRightSCEV = nullptr; + + // If RightValue resides within loop (but still being loop invariant), + // regenerate it as preheader. + if (auto *I = dyn_cast<Instruction>(RightValue)) + if (L.contains(I->getParent())) + FixedRightSCEV = RightSCEV; + + if (IsIncreasing) { + bool DecreasedRightValueByOne = false; + if (StepCI->isOne()) { + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (++i != len) { while (++i < len) { + // ... ---> ... + // } } + // If both parts are known non-negative, it is profitable to use + // unsigned comparison in increasing loop. This allows us to make the + // comparison check against "RightSCEV + 1" more optimistic. + if (isKnownNonNegativeInLoop(IndVarStart, &L, SE) && + isKnownNonNegativeInLoop(RightSCEV, &L, SE)) + Pred = ICmpInst::ICMP_ULT; + else + Pred = ICmpInst::ICMP_SLT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { + // while (true) { while (true) { + // if (++i == len) ---> if (++i > len - 1) + // break; break; + // ... ... + // } } + if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && + cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ false)) { + Pred = ICmpInst::ICMP_UGT; + RightSCEV = + SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ true)) { + Pred = ICmpInst::ICMP_SGT; + RightSCEV = + SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } + } + } + + bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); + bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT); + bool FoundExpectedPred = + (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0); + + if (!FoundExpectedPred) { + FailureReason = "expected icmp slt semantically, found something else"; + return std::nullopt; + } + + IsSignedPredicate = ICmpInst::isSigned(Pred); + if (!IsSignedPredicate && !AllowUnsignedLatchCond) { + FailureReason = "unsigned latch conditions are explicitly prohibited"; + return std::nullopt; + } + + if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred, + LatchBrExitIdx, &L, SE)) { + FailureReason = "Unsafe loop bounds"; + return std::nullopt; + } + if (LatchBrExitIdx == 0) { + // We need to increase the right value unless we have already decreased + // it virtually when we replaced EQ with SGT. + if (!DecreasedRightValueByOne) + FixedRightSCEV = + SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + } else { + assert(!DecreasedRightValueByOne && + "Right value can be decreased only for LatchBrExitIdx == 0!"); + } + } else { + bool IncreasedRightValueByOne = false; + if (StepCI->isMinusOne()) { + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (--i != len) { while (--i > len) { + // ... ---> ... + // } } + // We intentionally don't turn the predicate into UGT even if we know + // that both operands are non-negative, because it will only pessimize + // our check against "RightSCEV - 1". + Pred = ICmpInst::ICMP_SGT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { + // while (true) { while (true) { + // if (--i == len) ---> if (--i < len + 1) + // break; break; + // ... ... + // } } + if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && + cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) { + Pred = ICmpInst::ICMP_ULT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) { + Pred = ICmpInst::ICMP_SLT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } + } + } + + bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); + bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT); + + bool FoundExpectedPred = + (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0); + + if (!FoundExpectedPred) { + FailureReason = "expected icmp sgt semantically, found something else"; + return std::nullopt; + } + + IsSignedPredicate = + Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; + + if (!IsSignedPredicate && !AllowUnsignedLatchCond) { + FailureReason = "unsigned latch conditions are explicitly prohibited"; + return std::nullopt; + } + + if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred, + LatchBrExitIdx, &L, SE)) { + FailureReason = "Unsafe bounds"; + return std::nullopt; + } + + if (LatchBrExitIdx == 0) { + // We need to decrease the right value unless we have already increased + // it virtually when we replaced EQ with SLT. + if (!IncreasedRightValueByOne) + FixedRightSCEV = + SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); + } else { + assert(!IncreasedRightValueByOne && + "Right value can be increased only for LatchBrExitIdx == 0!"); + } + } + BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); + + assert(!L.contains(LatchExit) && "expected an exit block!"); + const DataLayout &DL = Preheader->getModule()->getDataLayout(); + SCEVExpander Expander(SE, DL, "loop-constrainer"); + Instruction *Ins = Preheader->getTerminator(); + + if (FixedRightSCEV) + RightValue = + Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->getType(), Ins); + + Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy, Ins); + IndVarStartV->setName("indvar.start"); + + LoopStructure Result; + + Result.Tag = "main"; + Result.Header = Header; + Result.Latch = Latch; + Result.LatchBr = LatchBr; + Result.LatchExit = LatchExit; + Result.LatchBrExitIdx = LatchBrExitIdx; + Result.IndVarStart = IndVarStartV; + Result.IndVarStep = StepCI; + Result.IndVarBase = LeftValue; + Result.IndVarIncreasing = IsIncreasing; + Result.LoopExitAt = RightValue; + Result.IsSignedPredicate = IsSignedPredicate; + Result.ExitCountTy = cast<IntegerType>(MaxBETakenCount->getType()); + + FailureReason = nullptr; + + return Result; +} + +// Add metadata to the loop L to disable loop optimizations. Callers need to +// confirm that optimizing loop L is not beneficial. +static void DisableAllLoopOptsOnLoop(Loop &L) { + // We do not care about any existing loopID related metadata for L, since we + // are setting all loop metadata to false. + LLVMContext &Context = L.getHeader()->getContext(); + // Reserve first location for self reference to the LoopID metadata node. + MDNode *Dummy = MDNode::get(Context, {}); + MDNode *DisableUnroll = MDNode::get( + Context, {MDString::get(Context, "llvm.loop.unroll.disable")}); + Metadata *FalseVal = + ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0)); + MDNode *DisableVectorize = MDNode::get( + Context, + {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal}); + MDNode *DisableLICMVersioning = MDNode::get( + Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")}); + MDNode *DisableDistribution = MDNode::get( + Context, + {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal}); + MDNode *NewLoopID = + MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize, + DisableLICMVersioning, DisableDistribution}); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L.setLoopID(NewLoopID); +} + +LoopConstrainer::LoopConstrainer(Loop &L, LoopInfo &LI, + function_ref<void(Loop *, bool)> LPMAddNewLoop, + const LoopStructure &LS, ScalarEvolution &SE, + DominatorTree &DT, Type *T, SubRanges SR) + : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE), + DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L), RangeTy(T), + MainLoopStructure(LS), SR(SR) {} + +void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, + const char *Tag) const { + for (BasicBlock *BB : OriginalLoop.getBlocks()) { + BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F); + Result.Blocks.push_back(Clone); + Result.Map[BB] = Clone; + } + + auto GetClonedValue = [&Result](Value *V) { + assert(V && "null values not in domain!"); + auto It = Result.Map.find(V); + if (It == Result.Map.end()) + return V; + return static_cast<Value *>(It->second); + }; + + auto *ClonedLatch = + cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch())); + ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag, + MDNode::get(Ctx, {})); + + Result.Structure = MainLoopStructure.map(GetClonedValue); + Result.Structure.Tag = Tag; + + for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) { + BasicBlock *ClonedBB = Result.Blocks[i]; + BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i]; + + assert(Result.Map[OriginalBB] == ClonedBB && "invariant!"); + + for (Instruction &I : *ClonedBB) + RemapInstruction(&I, Result.Map, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + // Exit blocks will now have one more predecessor and their PHI nodes need + // to be edited to reflect that. No phi nodes need to be introduced because + // the loop is in LCSSA. + + for (auto *SBB : successors(OriginalBB)) { + if (OriginalLoop.contains(SBB)) + continue; // not an exit block + + for (PHINode &PN : SBB->phis()) { + Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB); + PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB); + SE.forgetValue(&PN); + } + } + } +} + +LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( + const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt, + BasicBlock *ContinuationBlock) const { + // We start with a loop with a single latch: + // + // +--------------------+ + // | | + // | preheader | + // | | + // +--------+-----------+ + // | ----------------\ + // | / | + // +--------v----v------+ | + // | | | + // | header | | + // | | | + // +--------------------+ | + // | + // ..... | + // | + // +--------------------+ | + // | | | + // | latch >----------/ + // | | + // +-------v------------+ + // | + // | + // | +--------------------+ + // | | | + // +---> original exit | + // | | + // +--------------------+ + // + // We change the control flow to look like + // + // + // +--------------------+ + // | | + // | preheader >-------------------------+ + // | | | + // +--------v-----------+ | + // | /-------------+ | + // | / | | + // +--------v--v--------+ | | + // | | | | + // | header | | +--------+ | + // | | | | | | + // +--------------------+ | | +-----v-----v-----------+ + // | | | | + // | | | .pseudo.exit | + // | | | | + // | | +-----------v-----------+ + // | | | + // ..... | | | + // | | +--------v-------------+ + // +--------------------+ | | | | + // | | | | | ContinuationBlock | + // | latch >------+ | | | + // | | | +----------------------+ + // +---------v----------+ | + // | | + // | | + // | +---------------^-----+ + // | | | + // +-----> .exit.selector | + // | | + // +----------v----------+ + // | + // +--------------------+ | + // | | | + // | original exit <----+ + // | | + // +--------------------+ + + RewrittenRangeInfo RRI; + + BasicBlock *BBInsertLocation = LS.Latch->getNextNode(); + RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", + &F, BBInsertLocation); + RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, + BBInsertLocation); + + BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator()); + bool Increasing = LS.IndVarIncreasing; + bool IsSignedPredicate = LS.IsSignedPredicate; + + IRBuilder<> B(PreheaderJump); + auto NoopOrExt = [&](Value *V) { + if (V->getType() == RangeTy) + return V; + return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName()) + : B.CreateZExt(V, RangeTy, "wide." + V->getName()); + }; + + // EnterLoopCond - is it okay to start executing this `LS'? + Value *EnterLoopCond = nullptr; + auto Pred = + Increasing + ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT) + : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); + Value *IndVarStart = NoopOrExt(LS.IndVarStart); + EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt); + + B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); + PreheaderJump->eraseFromParent(); + + LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); + B.SetInsertPoint(LS.LatchBr); + Value *IndVarBase = NoopOrExt(LS.IndVarBase); + Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt); + + Value *CondForBranch = LS.LatchBrExitIdx == 1 + ? TakeBackedgeLoopCond + : B.CreateNot(TakeBackedgeLoopCond); + + LS.LatchBr->setCondition(CondForBranch); + + B.SetInsertPoint(RRI.ExitSelector); + + // IterationsLeft - are there any more iterations left, given the original + // upper bound on the induction variable? If not, we branch to the "real" + // exit. + Value *LoopExitAt = NoopOrExt(LS.LoopExitAt); + Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt); + B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); + + BranchInst *BranchToContinuation = + BranchInst::Create(ContinuationBlock, RRI.PseudoExit); + + // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of + // each of the PHI nodes in the loop header. This feeds into the initial + // value of the same PHI nodes if/when we continue execution. + for (PHINode &PN : LS.Header->phis()) { + PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy", + BranchToContinuation); + + NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader); + NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch), + RRI.ExitSelector); + RRI.PHIValuesAtPseudoExit.push_back(NewPHI); + } + + RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end", + BranchToContinuation); + RRI.IndVarEnd->addIncoming(IndVarStart, Preheader); + RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector); + + // The latch exit now has a branch from `RRI.ExitSelector' instead of + // `LS.Latch'. The PHI nodes need to be updated to reflect that. + LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector); + + return RRI; +} + +void LoopConstrainer::rewriteIncomingValuesForPHIs( + LoopStructure &LS, BasicBlock *ContinuationBlock, + const LoopConstrainer::RewrittenRangeInfo &RRI) const { + unsigned PHIIndex = 0; + for (PHINode &PN : LS.Header->phis()) + PN.setIncomingValueForBlock(ContinuationBlock, + RRI.PHIValuesAtPseudoExit[PHIIndex++]); + + LS.IndVarStart = RRI.IndVarEnd; +} + +BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS, + BasicBlock *OldPreheader, + const char *Tag) const { + BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); + BranchInst::Create(LS.Header, Preheader); + + LS.Header->replacePhiUsesWith(OldPreheader, Preheader); + + return Preheader; +} + +void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) { + Loop *ParentLoop = OriginalLoop.getParentLoop(); + if (!ParentLoop) + return; + + for (BasicBlock *BB : BBs) + ParentLoop->addBasicBlockToLoop(BB, LI); +} + +Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent, + ValueToValueMapTy &VM, + bool IsSubloop) { + Loop &New = *LI.AllocateLoop(); + if (Parent) + Parent->addChildLoop(&New); + else + LI.addTopLevelLoop(&New); + LPMAddNewLoop(&New, IsSubloop); + + // Add all of the blocks in Original to the new loop. + for (auto *BB : Original->blocks()) + if (LI.getLoopFor(BB) == Original) + New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI); + + // Add all of the subloops to the new loop. + for (Loop *SubLoop : *Original) + createClonedLoopStructure(SubLoop, &New, VM, /* IsSubloop */ true); + + return &New; +} + +bool LoopConstrainer::run() { + BasicBlock *Preheader = OriginalLoop.getLoopPreheader(); + assert(Preheader != nullptr && "precondition!"); + + OriginalPreheader = Preheader; + MainLoopPreheader = Preheader; + bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate; + bool Increasing = MainLoopStructure.IndVarIncreasing; + IntegerType *IVTy = cast<IntegerType>(RangeTy); + + SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "loop-constrainer"); + Instruction *InsertPt = OriginalPreheader->getTerminator(); + + // It would have been better to make `PreLoop' and `PostLoop' + // `std::optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy + // constructor. + ClonedLoop PreLoop, PostLoop; + bool NeedsPreLoop = + Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value(); + bool NeedsPostLoop = + Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value(); + + Value *ExitPreLoopAt = nullptr; + Value *ExitMainLoopAt = nullptr; + const SCEVConstant *MinusOneS = + cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */)); + + if (NeedsPreLoop) { + const SCEV *ExitPreLoopAtSCEV = nullptr; + + if (Increasing) + ExitPreLoopAtSCEV = *SR.LowLimit; + else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE, + IsSignedPredicate)) + ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); + else { + LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing " + << "preloop exit limit. HighLimit = " + << *(*SR.HighLimit) << "\n"); + return false; + } + + if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) { + LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the" + << " preloop exit limit " << *ExitPreLoopAtSCEV + << " at block " << InsertPt->getParent()->getName() + << "\n"); + return false; + } + + ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt); + ExitPreLoopAt->setName("exit.preloop.at"); + } + + if (NeedsPostLoop) { + const SCEV *ExitMainLoopAtSCEV = nullptr; + + if (Increasing) + ExitMainLoopAtSCEV = *SR.HighLimit; + else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE, + IsSignedPredicate)) + ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); + else { + LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing " + << "mainloop exit limit. LowLimit = " + << *(*SR.LowLimit) << "\n"); + return false; + } + + if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) { + LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the" + << " main loop exit limit " << *ExitMainLoopAtSCEV + << " at block " << InsertPt->getParent()->getName() + << "\n"); + return false; + } + + ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt); + ExitMainLoopAt->setName("exit.mainloop.at"); + } + + // We clone these ahead of time so that we don't have to deal with changing + // and temporarily invalid IR as we transform the loops. + if (NeedsPreLoop) + cloneLoop(PreLoop, "preloop"); + if (NeedsPostLoop) + cloneLoop(PostLoop, "postloop"); + + RewrittenRangeInfo PreLoopRRI; + + if (NeedsPreLoop) { + Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header, + PreLoop.Structure.Header); + + MainLoopPreheader = + createPreheader(MainLoopStructure, Preheader, "mainloop"); + PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader, + ExitPreLoopAt, MainLoopPreheader); + rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, + PreLoopRRI); + } + + BasicBlock *PostLoopPreheader = nullptr; + RewrittenRangeInfo PostLoopRRI; + + if (NeedsPostLoop) { + PostLoopPreheader = + createPreheader(PostLoop.Structure, Preheader, "postloop"); + PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, + ExitMainLoopAt, PostLoopPreheader); + rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, + PostLoopRRI); + } + + BasicBlock *NewMainLoopPreheader = + MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr; + BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit, + PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit, + PostLoopRRI.ExitSelector, NewMainLoopPreheader}; + + // Some of the above may be nullptr, filter them out before passing to + // addToParentLoopIfNeeded. + auto NewBlocksEnd = + std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr); + + addToParentLoopIfNeeded(ArrayRef(std::begin(NewBlocks), NewBlocksEnd)); + + DT.recalculate(F); + + // We need to first add all the pre and post loop blocks into the loop + // structures (as part of createClonedLoopStructure), and then update the + // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating + // LI when LoopSimplifyForm is generated. + Loop *PreL = nullptr, *PostL = nullptr; + if (!PreLoop.Blocks.empty()) { + PreL = createClonedLoopStructure(&OriginalLoop, + OriginalLoop.getParentLoop(), PreLoop.Map, + /* IsSubLoop */ false); + } + + if (!PostLoop.Blocks.empty()) { + PostL = + createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(), + PostLoop.Map, /* IsSubLoop */ false); + } + + // This function canonicalizes the loop into Loop-Simplify and LCSSA forms. + auto CanonicalizeLoop = [&](Loop *L, bool IsOriginalLoop) { + formLCSSARecursively(*L, DT, &LI, &SE); + simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true); + // Pre/post loops are slow paths, we do not need to perform any loop + // optimizations on them. + if (!IsOriginalLoop) + DisableAllLoopOptsOnLoop(*L); + }; + if (PreL) + CanonicalizeLoop(PreL, false); + if (PostL) + CanonicalizeLoop(PostL, false); + CanonicalizeLoop(&OriginalLoop, true); + + /// At this point: + /// - We've broken a "main loop" out of the loop in a way that the "main loop" + /// runs with the induction variable in a subset of [Begin, End). + /// - There is no overflow when computing "main loop" exit limit. + /// - Max latch taken count of the loop is limited. + /// It guarantees that induction variable will not overflow iterating in the + /// "main loop". + if (isa<OverflowingBinaryOperator>(MainLoopStructure.IndVarBase)) + if (IsSignedPredicate) + cast<BinaryOperator>(MainLoopStructure.IndVarBase) + ->setHasNoSignedWrap(true); + /// TODO: support unsigned predicate. + /// To add NUW flag we need to prove that both operands of BO are + /// non-negative. E.g: + /// ... + /// %iv.next = add nsw i32 %iv, -1 + /// %cmp = icmp ult i32 %iv.next, %n + /// br i1 %cmp, label %loopexit, label %loop + /// + /// -1 is MAX_UINT in terms of unsigned int. Adding anything but zero will + /// overflow, therefore NUW flag is not legal here. + + return true; +} diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index d701cf110154..f76fa3bb6c61 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -351,11 +351,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, MaxPeelCount = std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount); - auto ComputePeelCount = [&](Value *Condition) -> void { - if (!Condition->getType()->isIntegerTy()) + const unsigned MaxDepth = 4; + std::function<void(Value *, unsigned)> ComputePeelCount = + [&](Value *Condition, unsigned Depth) -> void { + if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth) return; Value *LeftVal, *RightVal; + if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) || + match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) { + ComputePeelCount(LeftVal, Depth + 1); + ComputePeelCount(RightVal, Depth + 1); + return; + } + CmpInst::Predicate Pred; if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) return; @@ -443,7 +452,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, for (BasicBlock *BB : L.blocks()) { for (Instruction &I : *BB) { if (SelectInst *SI = dyn_cast<SelectInst>(&I)) - ComputePeelCount(SI->getCondition()); + ComputePeelCount(SI->getCondition(), 0); } auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); @@ -454,7 +463,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, if (L.getLoopLatch() == BB) continue; - ComputePeelCount(BI->getCondition()); + ComputePeelCount(BI->getCondition(), 0); } return DesiredPeelCount; @@ -624,21 +633,24 @@ struct WeightInfo { /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to /// go to exit. /// Then, Estimated ExitCount = F / E. -/// For I-th (counting from 0) peeled off iteration we set the the weights for +/// For I-th (counting from 0) peeled off iteration we set the weights for /// the peeled exit as (EC - I, 1). It gives us reasonable distribution, /// The probability to go to exit 1/(EC-I) increases. At the same time /// the estimated exit count in the remainder loop reduces by I. /// To avoid dealing with division rounding we can just multiple both part /// of weights to E and use weight as (F - I * E, E). static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { - MDBuilder MDB(Term->getContext()); - Term->setMetadata(LLVMContext::MD_prof, - MDB.createBranchWeights(Info.Weights)); + setBranchWeights(*Term, Info.Weights); for (auto [Idx, SubWeight] : enumerate(Info.SubWeights)) if (SubWeight != 0) - Info.Weights[Idx] = Info.Weights[Idx] > SubWeight - ? Info.Weights[Idx] - SubWeight - : 1; + // Don't set the probability of taking the edge from latch to loop header + // to less than 1:1 ratio (meaning Weight should not be lower than + // SubWeight), as this could significantly reduce the loop's hotness, + // which would be incorrect in the case of underestimating the trip count. + Info.Weights[Idx] = + Info.Weights[Idx] > SubWeight + ? std::max(Info.Weights[Idx] - SubWeight, SubWeight) + : SubWeight; } /// Initialize the weights for all exiting blocks. @@ -685,14 +697,6 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, } } -/// Update the weights of original exiting block after peeling off all -/// iterations. -static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info) { - MDBuilder MDB(Term->getContext()); - Term->setMetadata(LLVMContext::MD_prof, - MDB.createBranchWeights(Info.Weights)); -} - /// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being @@ -1028,8 +1032,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - for (const auto &[Term, Info] : Weights) - fixupBranchWeights(Term, Info); + for (const auto &[Term, Info] : Weights) { + setBranchWeights(*Term, Info.Weights); + } // Update Metadata for count of peeled off iterations. unsigned AlreadyPeeled = 0; diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index d81db5647c60..76280ed492b3 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -25,6 +25,8 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -50,6 +52,9 @@ static cl::opt<bool> cl::desc("Allow loop rotation multiple times in order to reach " "a better latch exit")); +// Probability that a rotated loop has zero trip count / is never entered. +static constexpr uint32_t ZeroTripCountWeights[] = {1, 127}; + namespace { /// A simple loop rotation transformation. class LoopRotate { @@ -154,7 +159,8 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug // intrinsics. SmallVector<DbgValueInst *, 1> DbgValues; - llvm::findDbgValues(DbgValues, OrigHeaderVal); + SmallVector<DPValue *, 1> DPValues; + llvm::findDbgValues(DbgValues, OrigHeaderVal, &DPValues); for (auto &DbgValue : DbgValues) { // The original users in the OrigHeader are already using the original // definitions. @@ -175,6 +181,29 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, NewVal = UndefValue::get(OrigHeaderVal->getType()); DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal); } + + // RemoveDIs: duplicate implementation for non-instruction debug-info + // storage in DPValues. + for (DPValue *DPV : DPValues) { + // The original users in the OrigHeader are already using the original + // definitions. + BasicBlock *UserBB = DPV->getMarker()->getParent(); + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped and anything else can be handled by + // the SSAUpdater. To avoid adding PHINodes, check if the value is + // available in UserBB, if not substitute undef. + Value *NewVal; + if (UserBB == OrigPreheader) + NewVal = OrigPreHeaderVal; + else if (SSA.HasValueForBlock(UserBB)) + NewVal = SSA.GetValueInMiddleOfBlock(UserBB); + else + NewVal = UndefValue::get(OrigHeaderVal->getType()); + DPV->replaceVariableLocationOp(OrigHeaderVal, NewVal); + } } } @@ -244,6 +273,123 @@ static bool canRotateDeoptimizingLatchExit(Loop *L) { return false; } +static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI, + bool HasConditionalPreHeader, + bool SuccsSwapped) { + MDNode *WeightMD = getBranchWeightMDNode(PreHeaderBI); + if (WeightMD == nullptr) + return; + + // LoopBI should currently be a clone of PreHeaderBI with the same + // metadata. But we double check to make sure we don't have a degenerate case + // where instsimplify changed the instructions. + if (WeightMD != getBranchWeightMDNode(LoopBI)) + return; + + SmallVector<uint32_t, 2> Weights; + extractFromBranchWeightMD(WeightMD, Weights); + if (Weights.size() != 2) + return; + uint32_t OrigLoopExitWeight = Weights[0]; + uint32_t OrigLoopBackedgeWeight = Weights[1]; + + if (SuccsSwapped) + std::swap(OrigLoopExitWeight, OrigLoopBackedgeWeight); + + // Update branch weights. Consider the following edge-counts: + // + // | |-------- | + // V V | V + // Br i1 ... | Br i1 ... + // | | | | | + // x| y| | becomes: | y0| |----- + // V V | | V V | + // Exit Loop | | Loop | + // | | | Br i1 ... | + // ----- | | | | + // x0| x1| y1 | | + // V V ---- + // Exit + // + // The following must hold: + // - x == x0 + x1 # counts to "exit" must stay the same. + // - y0 == x - x0 == x1 # how often loop was entered at all. + // - y1 == y - y0 # How often loop was repeated (after first iter.). + // + // We cannot generally deduce how often we had a zero-trip count loop so we + // have to make a guess for how to distribute x among the new x0 and x1. + + uint32_t ExitWeight0; // aka x0 + uint32_t ExitWeight1; // aka x1 + uint32_t EnterWeight; // aka y0 + uint32_t LoopBackWeight; // aka y1 + if (OrigLoopExitWeight > 0 && OrigLoopBackedgeWeight > 0) { + ExitWeight0 = 0; + if (HasConditionalPreHeader) { + // Here we cannot know how many 0-trip count loops we have, so we guess: + if (OrigLoopBackedgeWeight >= OrigLoopExitWeight) { + // If the loop count is bigger than the exit count then we set + // probabilities as if 0-trip count nearly never happens. + ExitWeight0 = ZeroTripCountWeights[0]; + // Scale up counts if necessary so we can match `ZeroTripCountWeights` + // for the `ExitWeight0`:`ExitWeight1` (aka `x0`:`x1` ratio`) ratio. + while (OrigLoopExitWeight < ZeroTripCountWeights[1] + ExitWeight0) { + // ... but don't overflow. + uint32_t const HighBit = uint32_t{1} << (sizeof(uint32_t) * 8 - 1); + if ((OrigLoopBackedgeWeight & HighBit) != 0 || + (OrigLoopExitWeight & HighBit) != 0) + break; + OrigLoopBackedgeWeight <<= 1; + OrigLoopExitWeight <<= 1; + } + } else { + // If there's a higher exit-count than backedge-count then we set + // probabilities as if there are only 0-trip and 1-trip cases. + ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight; + } + } + ExitWeight1 = OrigLoopExitWeight - ExitWeight0; + EnterWeight = ExitWeight1; + LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight; + } else if (OrigLoopExitWeight == 0) { + if (OrigLoopBackedgeWeight == 0) { + // degenerate case... keep everything zero... + ExitWeight0 = 0; + ExitWeight1 = 0; + EnterWeight = 0; + LoopBackWeight = 0; + } else { + // Special case "LoopExitWeight == 0" weights which behaves like an + // endless where we don't want loop-enttry (y0) to be the same as + // loop-exit (x1). + ExitWeight0 = 0; + ExitWeight1 = 0; + EnterWeight = 1; + LoopBackWeight = OrigLoopBackedgeWeight; + } + } else { + // loop is never entered. + assert(OrigLoopBackedgeWeight == 0 && "remaining case is backedge zero"); + ExitWeight0 = 1; + ExitWeight1 = 1; + EnterWeight = 0; + LoopBackWeight = 0; + } + + const uint32_t LoopBIWeights[] = { + SuccsSwapped ? LoopBackWeight : ExitWeight1, + SuccsSwapped ? ExitWeight1 : LoopBackWeight, + }; + setBranchWeights(LoopBI, LoopBIWeights); + if (HasConditionalPreHeader) { + const uint32_t PreHeaderBIWeights[] = { + SuccsSwapped ? EnterWeight : ExitWeight0, + SuccsSwapped ? ExitWeight0 : EnterWeight, + }; + setBranchWeights(PreHeaderBI, PreHeaderBIWeights); + } +} + /// Rotate loop LP. Return true if the loop is rotated. /// /// \param SimplifiedLatch is true if the latch was just folded into the final @@ -363,7 +509,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // loop. Otherwise loop is not suitable for rotation. BasicBlock *Exit = BI->getSuccessor(0); BasicBlock *NewHeader = BI->getSuccessor(1); - if (L->contains(Exit)) + bool BISuccsSwapped = L->contains(Exit); + if (BISuccsSwapped) std::swap(Exit, NewHeader); assert(NewHeader && "Unable to determine new loop header"); assert(L->contains(NewHeader) && !L->contains(Exit) && @@ -394,20 +541,32 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // duplication. using DbgIntrinsicHash = std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>; - auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash { + auto makeHash = [](auto *D) -> DbgIntrinsicHash { auto VarLocOps = D->location_ops(); return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()), D->getVariable()}, D->getExpression()}; }; + SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) { - if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) + if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) { DbgIntrinsics.insert(makeHash(DII)); - else + // Until RemoveDIs supports dbg.declares in DPValue format, we'll need + // to collect DPValues attached to any other debug intrinsics. + for (const DPValue &DPV : DII->getDbgValueRange()) + DbgIntrinsics.insert(makeHash(&DPV)); + } else { break; + } } + // Build DPValue hashes for DPValues attached to the terminator, which isn't + // considered in the loop above. + for (const DPValue &DPV : + OrigPreheader->getTerminator()->getDbgValueRange()) + DbgIntrinsics.insert(makeHash(&DPV)); + // Remember the local noalias scope declarations in the header. After the // rotation, they must be duplicated and the scope must be cloned. This // avoids unwanted interaction across iterations. @@ -416,6 +575,29 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) NoAliasDeclInstructions.push_back(Decl); + Module *M = OrigHeader->getModule(); + + // Track the next DPValue to clone. If we have a sequence where an + // instruction is hoisted instead of being cloned: + // DPValue blah + // %foo = add i32 0, 0 + // DPValue xyzzy + // %bar = call i32 @foobar() + // where %foo is hoisted, then the DPValue "blah" will be seen twice, once + // attached to %foo, then when %foo his hoisted it will "fall down" onto the + // function call: + // DPValue blah + // DPValue xyzzy + // %bar = call i32 @foobar() + // causing it to appear attached to the call too. + // + // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from + // here" position to account for this behaviour. We point it at any DPValues + // on the next instruction, here labelled xyzzy, before we hoist %foo. + // Later, we only only clone DPValues from that position (xyzzy) onwards, + // which avoids cloning DPValue "blah" multiple times. + std::optional<DPValue::self_iterator> NextDbgInst = std::nullopt; + while (I != E) { Instruction *Inst = &*I++; @@ -428,7 +610,21 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && !Inst->isTerminator() && !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) { + + if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) { + auto DbgValueRange = + LoopEntryBranch->cloneDebugInfoFrom(Inst, NextDbgInst); + RemapDPValueRange(M, DbgValueRange, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + // Erase anything we've seen before. + for (DPValue &DPV : make_early_inc_range(DbgValueRange)) + if (DbgIntrinsics.count(makeHash(&DPV))) + DPV.eraseFromParent(); + } + + NextDbgInst = I->getDbgValueRange().begin(); Inst->moveBefore(LoopEntryBranch); + ++NumInstrsHoisted; continue; } @@ -439,6 +635,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { ++NumInstrsDuplicated; + if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) { + auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInst); + RemapDPValueRange(M, Range, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + NextDbgInst = std::nullopt; + // Erase anything we've seen before. + for (DPValue &DPV : make_early_inc_range(Range)) + if (DbgIntrinsics.count(makeHash(&DPV))) + DPV.eraseFromParent(); + } + // Eagerly remap the operands of the instruction. RemapInstruction(C, ValueMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); @@ -553,6 +760,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // OrigPreHeader's old terminator (the original branch into the loop), and // remove the corresponding incoming values from the PHI nodes in OrigHeader. LoopEntryBranch->eraseFromParent(); + OrigPreheader->flushTerminatorDbgValues(); // Update MemorySSA before the rewrite call below changes the 1:1 // instruction:cloned_instruction_or_value mapping. @@ -605,9 +813,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // to split as many edges. BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); assert(PHBI->isConditional() && "Should be clone of BI condbr!"); - if (!isa<ConstantInt>(PHBI->getCondition()) || - PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != - NewHeader) { + const Value *Cond = PHBI->getCondition(); + const bool HasConditionalPreHeader = + !isa<ConstantInt>(Cond) || + PHBI->getSuccessor(cast<ConstantInt>(Cond)->isZero()) != NewHeader; + + updateBranchWeights(*PHBI, *BI, HasConditionalPreHeader, BISuccsSwapped); + + if (HasConditionalPreHeader) { // The conditional branch can't be folded, handle the general case. // Split edges as necessary to preserve LoopSimplify form. diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 3e604fdf2e11..07e622b1577f 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -429,8 +429,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. - for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) - PN->removeIncomingValue(e-i, false); + PN->removeIncomingValueIf([](unsigned Idx) { return Idx != 0; }, + /* DeletePHIIfEmpty */ false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 511dd61308f9..ee6f7b35750a 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist_iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -838,7 +837,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, DTUToUse ? nullptr : DT)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); - llvm::erase_value(UnrolledLoopBlocks, Dest); + llvm::erase(UnrolledLoopBlocks, Dest); } } } diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index 31b8cd34eb24..3c06a6e47a30 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DomTreeUpdater.h" diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 1e22eca30d2d..612f69970881 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -56,6 +56,17 @@ static cl::opt<bool> UnrollRuntimeOtherExitPredictable( "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable")); +// Probability that the loop trip count is so small that after the prolog +// we do not enter the unrolled loop at all. +// It is unlikely that the loop trip count is smaller than the unroll factor; +// other than that, the choice of constant is not tuned yet. +static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127}; +// Probability that the loop trip count is so small that we skip the unrolled +// loop completely and immediately enter the epilogue loop. +// It is unlikely that the loop trip count is smaller than the unroll factor; +// other than that, the choice of constant is not tuned yet. +static const uint32_t EpilogHeaderWeights[] = {1, 127}; + /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the @@ -105,8 +116,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // PrologLatch. When supporting multiple-exiting block loops, we can have // two or more blocks that have the LatchExit as the target in the // original loop. - PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", - PrologExit->getFirstNonPHI()); + PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr"); + NewPN->insertBefore(PrologExit->getFirstNonPHIIt()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(&PN)) { @@ -169,7 +180,14 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, nullptr, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); + MDNode *BranchWeights = nullptr; + if (hasBranchWeightMD(*Latch->getTerminator())) { + // Assume loop is nearly always entered. + MDBuilder MDB(B.getContext()); + BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights); + } + B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader, + BranchWeights); InsertPt->eraseFromParent(); if (DT) { auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit, @@ -194,8 +212,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA, - ScalarEvolution &SE) { + LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, + unsigned Count) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); @@ -269,8 +287,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, for (PHINode &PN : Succ->phis()) { // Add new PHI nodes to the loop exit block and update epilog // PHIs with the new PHI values. - PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", - NewExit->getFirstNonPHI()); + PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr"); + NewPN->insertBefore(NewExit->getFirstNonPHIIt()); // Adding a value to the new PHI node from the unrolling loop preheader. NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); // Adding a value to the new PHI node from the unrolling loop latch. @@ -292,7 +310,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) - B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); + MDNode *BranchWeights = nullptr; + if (hasBranchWeightMD(*Latch->getTerminator())) { + // Assume equal distribution in interval [0, Count). + MDBuilder MDB(B.getContext()); + BranchWeights = MDB.createBranchWeights(1, Count - 1); + } + B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights); InsertPt->eraseFromParent(); if (DT) { auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit); @@ -316,8 +340,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, - std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, - ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { + std::vector<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, + DominatorTree *DT, LoopInfo *LI, unsigned Count) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -363,14 +388,34 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); - PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, - suffix + ".iter", - FirstLoopBB->getFirstNonPHI()); + PHINode *NewIdx = + PHINode::Create(NewIter->getType(), 2, suffix + ".iter"); + NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt()); auto *Zero = ConstantInt::get(NewIdx->getType(), 0); auto *One = ConstantInt::get(NewIdx->getType(), 1); - Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); + Value *IdxNext = + Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp"); - Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); + MDNode *BranchWeights = nullptr; + if (hasBranchWeightMD(*LatchBR)) { + uint32_t ExitWeight; + uint32_t BackEdgeWeight; + if (Count >= 3) { + // Note: We do not enter this loop for zero-remainders. The check + // is at the end of the loop. We assume equal distribution between + // possible remainders in [1, Count). + ExitWeight = 1; + BackEdgeWeight = (Count - 2) / 2; + } else { + // Unnecessary backedge, should never be taken. The conditional + // jump should be optimized away later. + ExitWeight = 1; + BackEdgeWeight = 0; + } + MDBuilder MDB(Builder.getContext()); + BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + } + Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights); NewIdx->addIncoming(Zero, InsertTop); NewIdx->addIncoming(IdxNext, NewBB); LatchBR->eraseFromParent(); @@ -464,32 +509,6 @@ static bool canProfitablyUnrollMultiExitLoop( // know of kinds of multiexit loops that would benefit from unrolling. } -// Assign the maximum possible trip count as the back edge weight for the -// remainder loop if the original loop comes with a branch weight. -static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, - Loop *RemainderLoop, - uint64_t UnrollFactor) { - uint64_t TrueWeight, FalseWeight; - BranchInst *LatchBR = - cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()); - if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight)) - return; - uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader() - ? FalseWeight - : TrueWeight; - assert(UnrollFactor > 1); - uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight; - BasicBlock *Header = RemainderLoop->getHeader(); - BasicBlock *Latch = RemainderLoop->getLoopLatch(); - auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator()); - unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1); - MDBuilder MDB(RemainderLatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); - RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); -} - /// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain /// accounting for the possibility of unsigned overflow in the 2s complement /// domain. Preconditions: @@ -775,7 +794,13 @@ bool llvm::UnrollRuntimeLoopRemainder( BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. - B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); + MDNode *BranchWeights = nullptr; + if (hasBranchWeightMD(*Latch->getTerminator())) { + // Assume loop is nearly always entered. + MDBuilder MDB(B.getContext()); + BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights); + } + B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) @@ -804,12 +829,7 @@ bool llvm::UnrollRuntimeLoopRemainder( BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, - NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); - - // Assign the maximum possible trip count as the back edge weight for the - // remainder loop if the original loop comes with a branch weight. - if (remainderLoop && !UnrollRemainder) - updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count); + NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count); // Insert the cloned blocks into the function. F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end()); @@ -893,9 +913,12 @@ bool llvm::UnrollRuntimeLoopRemainder( // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { + Module *M = BB->getModule(); for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + RemapDPValueRange(M, I.getDbgValueRange(), VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } @@ -903,7 +926,7 @@ bool llvm::UnrollRuntimeLoopRemainder( // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, - NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE); + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count); // Update counter in loop for unrolling. // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. @@ -912,8 +935,8 @@ bool llvm::UnrollRuntimeLoopRemainder( IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", - Header->getFirstNonPHI()); + PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter"); + NewIdx->insertBefore(Header->getFirstNonPHIIt()); B2.SetInsertPoint(LatchBR); auto *Zero = ConstantInt::get(NewIdx->getType(), 0); auto *One = ConstantInt::get(NewIdx->getType(), 1); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 7d6662c44f07..59485126b280 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -296,7 +296,7 @@ std::optional<MDNode *> llvm::makeFollowupLoopID( StringRef AttrName = cast<MDString>(NameMD)->getString(); // Do not inherit excluded attributes. - return !AttrName.startswith(InheritOptionsExceptPrefix); + return !AttrName.starts_with(InheritOptionsExceptPrefix); }; if (InheritThisAttribute(Op)) @@ -556,12 +556,8 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // Removes all incoming values from all other exiting blocks (including // duplicate values from an exiting block). // Nuke all entries except the zero'th entry which is the preheader entry. - // NOTE! We need to remove Incoming Values in the reverse order as done - // below, to keep the indices valid for deletion (removeIncomingValues - // updates getNumIncomingValues and shifts all values down into the - // operand being deleted). - for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) - P.removeIncomingValue(e - i, false); + P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; }, + /* DeletePHIIfEmpty */ false); assert((P.getNumIncomingValues() == 1 && P.getIncomingBlock(PredIndex) == Preheader) && @@ -608,6 +604,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // Use a map to unique and a vector to guarantee deterministic ordering. llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet; llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; + llvm::SmallVector<DPValue *, 4> DeadDPValues; if (ExitBlock) { // Given LCSSA form is satisfied, we should not have users of instructions @@ -632,6 +629,24 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, "Unexpected user in reachable block"); U.set(Poison); } + + // RemoveDIs: do the same as below for DPValues. + if (Block->IsNewDbgInfoFormat) { + for (DPValue &DPV : + llvm::make_early_inc_range(I.getDbgValueRange())) { + DebugVariable Key(DPV.getVariable(), DPV.getExpression(), + DPV.getDebugLoc().get()); + if (!DeadDebugSet.insert(Key).second) + continue; + // Unlinks the DPV from it's container, for later insertion. + DPV.removeFromParent(); + DeadDPValues.push_back(&DPV); + } + } + + // For one of each variable encountered, preserve a debug intrinsic (set + // to Poison) and transfer it to the loop exit. This terminates any + // variable locations that were set during the loop. auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); if (!DVI) continue; @@ -646,12 +661,22 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // be be replaced with undef. Loop invariant values will still be available. // Move dbg.values out the loop so that earlier location ranges are still // terminated and loop invariant assignments are preserved. - Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); - assert(InsertDbgValueBefore && + DIBuilder DIB(*ExitBlock->getModule()); + BasicBlock::iterator InsertDbgValueBefore = + ExitBlock->getFirstInsertionPt(); + assert(InsertDbgValueBefore != ExitBlock->end() && "There should be a non-PHI instruction in exit block, else these " "instructions will have no parent."); + for (auto *DVI : DeadDebugInst) - DVI->moveBefore(InsertDbgValueBefore); + DVI->moveBefore(*ExitBlock, InsertDbgValueBefore); + + // Due to the "head" bit in BasicBlock::iterator, we're going to insert + // each DPValue right at the start of the block, wheras dbg.values would be + // repeatedly inserted before the first instruction. To replicate this + // behaviour, do it backwards. + for (DPValue *DPV : llvm::reverse(DeadDPValues)) + ExitBlock->insertDPValueBefore(DPV, InsertDbgValueBefore); } // Remove the block from the reference counting scheme, so that we can @@ -937,8 +962,8 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) { } } -Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, - RecurKind RK, Value *Left, Value *Right) { +Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, + RecurKind RK, Value *Left, Value *Right) { if (auto VTy = dyn_cast<VectorType>(Left->getType())) StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal); Value *Cmp = @@ -1028,14 +1053,12 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } -Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder, - const TargetTransformInfo *TTI, - Value *Src, - const RecurrenceDescriptor &Desc, - PHINode *OrigPhi) { - assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind( - Desc.getRecurrenceKind()) && - "Unexpected reduction kind"); +Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src, + const RecurrenceDescriptor &Desc, + PHINode *OrigPhi) { + assert( + RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) && + "Unexpected reduction kind"); Value *InitVal = Desc.getRecurrenceStartValue(); Value *NewVal = nullptr; @@ -1068,9 +1091,8 @@ Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder, return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select"); } -Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, - const TargetTransformInfo *TTI, - Value *Src, RecurKind RdxKind) { +Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src, + RecurKind RdxKind) { auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); switch (RdxKind) { case RecurKind::Add: @@ -1111,7 +1133,6 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, } Value *llvm::createTargetReduction(IRBuilderBase &B, - const TargetTransformInfo *TTI, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi) { // TODO: Support in-order reductions based on the recurrence descriptor. @@ -1121,10 +1142,10 @@ Value *llvm::createTargetReduction(IRBuilderBase &B, B.setFastMathFlags(Desc.getFastMathFlags()); RecurKind RK = Desc.getRecurrenceKind(); - if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) - return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi); + if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) + return createAnyOfTargetReduction(B, Src, Desc, OrigPhi); - return createSimpleTargetReduction(B, TTI, Src, RK); + return createSimpleTargetReduction(B, Src, RK); } Value *llvm::createOrderedReduction(IRBuilderBase &B, @@ -1453,7 +1474,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, // Note that we must not perform expansions until after // we query *all* the costs, because if we perform temporary expansion // inbetween, one that we might not intend to keep, said expansion - // *may* affect cost calculation of the the next SCEV's we'll query, + // *may* affect cost calculation of the next SCEV's we'll query, // and next SCEV may errneously get smaller cost. // Collect all the candidate PHINodes to be rewritten. @@ -1632,42 +1653,92 @@ Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, struct PointerBounds { TrackingVH<Value> Start; TrackingVH<Value> End; + Value *StrideToCheck; }; /// Expand code for the lower and upper bound of the pointer group \p CG /// in \p TheLoop. \return the values for the bounds. static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, - SCEVExpander &Exp) { + SCEVExpander &Exp, bool HoistRuntimeChecks) { LLVMContext &Ctx = Loc->getContext(); - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace); + Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace); Value *Start = nullptr, *End = nullptr; LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr; + + // If the Low and High values are themselves loop-variant, then we may want + // to expand the range to include those covered by the outer loop as well. + // There is a trade-off here with the advantage being that creating checks + // using the expanded range permits the runtime memory checks to be hoisted + // out of the outer loop. This reduces the cost of entering the inner loop, + // which can be significant for low trip counts. The disadvantage is that + // there is a chance we may now never enter the vectorized inner loop, + // whereas using a restricted range check could have allowed us to enter at + // least once. This is why the behaviour is not currently the default and is + // controlled by the parameter 'HoistRuntimeChecks'. + if (HoistRuntimeChecks && TheLoop->getParentLoop() && + isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) { + auto *HighAR = cast<SCEVAddRecExpr>(High); + auto *LowAR = cast<SCEVAddRecExpr>(Low); + const Loop *OuterLoop = TheLoop->getParentLoop(); + const SCEV *Recur = LowAR->getStepRecurrence(*Exp.getSE()); + if (Recur == HighAR->getStepRecurrence(*Exp.getSE()) && + HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) { + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + const SCEV *OuterExitCount = + Exp.getSE()->getExitCount(OuterLoop, OuterLoopLatch); + if (!isa<SCEVCouldNotCompute>(OuterExitCount) && + OuterExitCount->getType()->isIntegerTy()) { + const SCEV *NewHigh = cast<SCEVAddRecExpr>(High)->evaluateAtIteration( + OuterExitCount, *Exp.getSE()); + if (!isa<SCEVCouldNotCompute>(NewHigh)) { + LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include " + "outer loop in order to permit hoisting\n"); + High = NewHigh; + Low = cast<SCEVAddRecExpr>(Low)->getStart(); + // If there is a possibility that the stride is negative then we have + // to generate extra checks to ensure the stride is positive. + if (!Exp.getSE()->isKnownNonNegative(Recur)) { + Stride = Recur; + LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is " + "positive: " + << *Stride << '\n'); + } + } + } + } + } + + Start = Exp.expandCodeFor(Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(High, PtrArithTy, Loc); if (CG->NeedsFreeze) { IRBuilder<> Builder(Loc); Start = Builder.CreateFreeze(Start, Start->getName() + ".fr"); End = Builder.CreateFreeze(End, End->getName() + ".fr"); } - LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); - return {Start, End}; + Value *StrideVal = + Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr; + LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n"); + return {Start, End, StrideVal}; } /// Turns a collection of checks into a collection of expanded upper and /// lower bounds for both pointers in the check. static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, - Instruction *Loc, SCEVExpander &Exp) { + Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) { SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; // Here we're relying on the SCEV Expander's cache to only emit code for the // same bounds once. transform(PointerChecks, std::back_inserter(ChecksWithBounds), [&](const RuntimePointerCheck &Check) { - PointerBounds First = expandBounds(Check.first, L, Loc, Exp), - Second = expandBounds(Check.second, L, Loc, Exp); + PointerBounds First = expandBounds(Check.first, L, Loc, Exp, + HoistRuntimeChecks), + Second = expandBounds(Check.second, L, Loc, Exp, + HoistRuntimeChecks); return std::make_pair(First, Second); }); @@ -1677,10 +1748,11 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L, Value *llvm::addRuntimeChecks( Instruction *Loc, Loop *TheLoop, const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, - SCEVExpander &Exp) { + SCEVExpander &Exp, bool HoistRuntimeChecks) { // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible. // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible - auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp); + auto ExpandedChecks = + expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks); LLVMContext &Ctx = Loc->getContext(); IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx, @@ -1693,21 +1765,13 @@ Value *llvm::addRuntimeChecks( const PointerBounds &A = Check.first, &B = Check.second; // Check if two pointers (A and B) conflict where conflict is computed as: // start(A) <= end(B) && start(B) <= end(A) - unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); - unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); - assert((AS0 == B.End->getType()->getPointerAddressSpace()) && - (AS1 == A.End->getType()->getPointerAddressSpace()) && + assert((A.Start->getType()->getPointerAddressSpace() == + B.End->getType()->getPointerAddressSpace()) && + (B.Start->getType()->getPointerAddressSpace() == + A.End->getType()->getPointerAddressSpace()) && "Trying to bounds check pointers with different address spaces"); - Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); - Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - - Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); - Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); - Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); - Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); - // [A|B].Start points to the first accessed byte under base [A|B]. // [A|B].End points to the last accessed byte, plus one. // There is no conflict when the intervals are disjoint: @@ -1716,9 +1780,21 @@ Value *llvm::addRuntimeChecks( // bound0 = (B.Start < A.End) // bound1 = (A.Start < B.End) // IsConflict = bound0 & bound1 - Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0"); - Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1"); + Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0"); + Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1"); Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + if (A.StrideToCheck) { + Value *IsNegativeStride = ChkBuilder.CreateICmpSLT( + A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0), + "stride.check"); + IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride); + } + if (B.StrideToCheck) { + Value *IsNegativeStride = ChkBuilder.CreateICmpSLT( + B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0), + "stride.check"); + IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride); + } if (MemoryRuntimeCheck) { IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); @@ -1740,23 +1816,31 @@ Value *llvm::addDiffRuntimeChecks( // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; + auto &SE = *Expander.getSE(); + // Map to keep track of created compares, The key is the pair of operands for + // the compare, to allow detecting and re-using redundant compares. + DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares; for (const auto &C : Checks) { Type *Ty = C.SinkStart->getType(); // Compute VF * IC * AccessSize. auto *VFTimesUFTimesSize = ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()), ConstantInt::get(Ty, IC * C.AccessSize)); - Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc); - Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc); - if (C.NeedsFreeze) { - IRBuilder<> Builder(Loc); - Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr"); - Src = Builder.CreateFreeze(Src, Src->getName() + ".fr"); - } - Value *Diff = ChkBuilder.CreateSub(Sink, Src); - Value *IsConflict = - ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); + Value *Diff = Expander.expandCodeFor( + SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc); + + // Check if the same compare has already been created earlier. In that case, + // there is no need to check it again. + Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize}); + if (IsConflict) + continue; + IsConflict = + ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); + SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict}); + if (C.NeedsFreeze) + IsConflict = + ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr"); if (MemoryRuntimeCheck) { IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 78ebe75c121b..548b0f3c55f0 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -145,8 +145,8 @@ void LoopVersioning::addPHINodes( } // If not create it. if (!PN) { - PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", - &PHIBlock->front()); + PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver"); + PN->insertBefore(PHIBlock->begin()); SmallVector<User*, 8> UsersToUpdate; for (User *U : Inst->users()) if (!VersionedLoop->contains(cast<Instruction>(U)->getParent())) diff --git a/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp index 195c274ff18e..4908535cba54 100644 --- a/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp +++ b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp @@ -128,7 +128,7 @@ static bool runImpl(Module &M) { // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); LLVMContext &C = M.getContext(); - PointerType *VoidStar = Type::getInt8PtrTy(C); + PointerType *VoidStar = PointerType::getUnqual(C); Type *AtExitFuncArgs[] = {VoidStar}; FunctionType *AtExitFuncTy = FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs, @@ -140,6 +140,17 @@ static bool runImpl(Module &M) { {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar}, /*isVarArg=*/false)); + // If __cxa_atexit is defined (e.g. in the case of LTO) and arg0 is not + // actually used (i.e. it's dummy/stub function as used in emscripten when + // the program never exits) we can simply return early and clear out + // @llvm.global_dtors. + if (auto F = dyn_cast<Function>(AtExit.getCallee())) { + if (F && F->hasExactDefinition() && F->getArg(0)->getNumUses() == 0) { + GV->eraseFromParent(); + return true; + } + } + // Declare __dso_local. Type *DsoHandleTy = Type::getInt8Ty(C); Constant *DsoHandle = M.getOrInsertGlobal("__dso_handle", DsoHandleTy, [&] { diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 906eb71fc2d9..c75de8687879 100644 --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -64,17 +64,6 @@ void llvm::createMemCpyLoopKnownSize( IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); - // Cast the Src and Dst pointers to pointers to the loop operand type (if - // needed). - PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); - PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); - if (SrcAddr->getType() != SrcOpType) { - SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); - } - if (DstAddr->getType() != DstOpType) { - DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); - } - Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); @@ -137,13 +126,9 @@ void llvm::createMemCpyLoopKnownSize( uint64_t GepIndex = BytesCopied / OperandSize; assert(GepIndex * OperandSize == BytesCopied && "Division should have no Remainder!"); - // Cast source to operand type and load - PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); - Value *CastedSrc = SrcAddr->getType() == SrcPtrType - ? SrcAddr - : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); + Value *SrcGEP = RBuilder.CreateInBoundsGEP( - OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); + OpTy, SrcAddr, ConstantInt::get(TypeOfCopyLen, GepIndex)); LoadInst *Load = RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); if (!CanOverlap) { @@ -151,13 +136,8 @@ void llvm::createMemCpyLoopKnownSize( Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); } - // Cast destination to operand type and store. - PointerType *DstPtrType = PointerType::get(OpTy, DstAS); - Value *CastedDst = DstAddr->getType() == DstPtrType - ? DstAddr - : RBuilder.CreateBitCast(DstAddr, DstPtrType); Value *DstGEP = RBuilder.CreateInBoundsGEP( - OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); + OpTy, DstAddr, ConstantInt::get(TypeOfCopyLen, GepIndex)); StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); if (!CanOverlap) { @@ -206,15 +186,6 @@ void llvm::createMemCpyLoopUnknownSize( IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); - PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); - PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); - if (SrcAddr->getType() != SrcOpType) { - SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); - } - if (DstAddr->getType() != DstOpType) { - DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); - } - // Calculate the loop trip count, and remaining bytes to copy after the loop. Type *CopyLenType = CopyLen->getType(); IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); @@ -305,13 +276,9 @@ void llvm::createMemCpyLoopUnknownSize( ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); ResidualIndex->addIncoming(Zero, ResHeaderBB); - Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast( - SrcAddr, PointerType::get(ResLoopOpType, SrcAS)); - Value *DstAsResLoopOpType = ResBuilder.CreateBitCast( - DstAddr, PointerType::get(ResLoopOpType, DstAS)); Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); - Value *SrcGEP = ResBuilder.CreateInBoundsGEP( - ResLoopOpType, SrcAsResLoopOpType, FullOffset); + Value *SrcGEP = + ResBuilder.CreateInBoundsGEP(ResLoopOpType, SrcAddr, FullOffset); LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile); if (!CanOverlap) { @@ -319,8 +286,8 @@ void llvm::createMemCpyLoopUnknownSize( Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); } - Value *DstGEP = ResBuilder.CreateInBoundsGEP( - ResLoopOpType, DstAsResLoopOpType, FullOffset); + Value *DstGEP = + ResBuilder.CreateInBoundsGEP(ResLoopOpType, DstAddr, FullOffset); StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); if (!CanOverlap) { @@ -479,11 +446,6 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, IRBuilder<> Builder(OrigBB->getTerminator()); - // Cast pointer to the type of value getting stored - unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); - DstAddr = Builder.CreateBitCast(DstAddr, - PointerType::get(SetValue->getType(), dstAS)); - Builder.CreateCondBr( Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, LoopBB); diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 44ac65f265f0..fd0112ae529c 100644 --- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -151,7 +151,7 @@ void MetaRename(Module &M, auto IsNameExcluded = [](StringRef &Name, SmallVectorImpl<StringRef> &ExcludedPrefixes) { return any_of(ExcludedPrefixes, - [&Name](auto &Prefix) { return Name.startswith(Prefix); }); + [&Name](auto &Prefix) { return Name.starts_with(Prefix); }); }; // Leave library functions alone because their presence or absence could @@ -159,7 +159,7 @@ void MetaRename(Module &M, auto ExcludeLibFuncs = [&](Function &F) { LibFunc Tmp; StringRef Name = F.getName(); - return Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + return Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) || GetTLI(F).getLibFunc(F, Tmp) || IsNameExcluded(Name, ExcludedFuncPrefixes); }; @@ -177,7 +177,7 @@ void MetaRename(Module &M, // Rename all aliases for (GlobalAlias &GA : M.aliases()) { StringRef Name = GA.getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + if (Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) || IsNameExcluded(Name, ExcludedAliasesPrefixes)) continue; @@ -187,7 +187,7 @@ void MetaRename(Module &M, // Rename all global variables for (GlobalVariable &GV : M.globals()) { StringRef Name = GV.getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + if (Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) || IsNameExcluded(Name, ExcludedGlobalsPrefixes)) continue; diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 1e243ef74df7..7de0959ca57e 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -44,17 +44,17 @@ static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, } GVCtor->eraseFromParent(); } else { - EltTy = StructType::get( - IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), - IRB.getInt8PtrTy()); + EltTy = StructType::get(IRB.getInt32Ty(), + PointerType::get(FnTy, F->getAddressSpace()), + IRB.getPtrTy()); } // Build a 3 field global_ctor entry. We don't take a comdat key. Constant *CSVals[3]; CSVals[0] = IRB.getInt32(Priority); CSVals[1] = F; - CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) - : Constant::getNullValue(IRB.getInt8PtrTy()); + CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy()) + : Constant::getNullValue(IRB.getPtrTy()); Constant *RuntimeCtorInit = ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); @@ -96,7 +96,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> if (GV) GV->eraseFromParent(); - Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext()); + Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext()); for (auto *V : Values) Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); @@ -301,7 +301,7 @@ std::string llvm::getUniqueModuleId(Module *M) { MD5 Md5; bool ExportsSymbols = false; auto AddGlobal = [&](GlobalValue &GV) { - if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + if (GV.isDeclaration() || GV.getName().starts_with("llvm.") || !GV.hasExternalLinkage() || GV.hasComdat()) return; ExportsSymbols = true; @@ -346,7 +346,8 @@ void VFABI::setVectorVariantNames(CallInst *CI, #ifndef NDEBUG for (const std::string &VariantMapping : VariantMappings) { LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); - std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M); + std::optional<VFInfo> VI = + VFABI::tryDemangleForVFABI(VariantMapping, CI->getFunctionType()); assert(VI && "Cannot add an invalid VFABI name."); assert(M->getNamedValue(VI->VectorName) && "Cannot add variant to attribute: " diff --git a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp index b0ca0b15c08e..a977ad87b79f 100644 --- a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp +++ b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp @@ -14,7 +14,6 @@ #include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ValueTracking.h" @@ -50,7 +49,7 @@ static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) { else if (auto *SI = dyn_cast<StoreInst>(&I)) ML = MemoryLocation::get(SI); else - assert(false && "memory location set"); + return std::nullopt; if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr))) return ML; @@ -202,7 +201,7 @@ static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) { // if two instructions are moved from the same BB to the same BB, we insert // the second one in the front, then the first on top of it. for (auto &Job : reverse(JobList)) { - Job.first->moveBefore(&*Job.second->getFirstInsertionPt()); + Job.first->moveBefore(*Job.second, Job.second->getFirstInsertionPt()); MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(), MemorySSA::InsertionPlace::Beginning); } diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 1f16ba78bdb0..902977b08d15 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -23,7 +23,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" @@ -33,12 +32,6 @@ using namespace llvm; using namespace PatternMatch; -INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo", - "PredicateInfo Printer", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo", - "PredicateInfo Printer", false, false) static cl::opt<bool> VerifyPredicateInfo( "verify-predicateinfo", cl::init(false), cl::Hidden, cl::desc("Verify PredicateInfo in legacy printer pass.")); @@ -835,20 +828,6 @@ std::optional<PredicateConstraint> PredicateBase::getConstraint() const { void PredicateInfo::verifyPredicateInfo() const {} -char PredicateInfoPrinterLegacyPass::ID = 0; - -PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass() - : FunctionPass(ID) { - initializePredicateInfoPrinterLegacyPassPass( - *PassRegistry::getPassRegistry()); -} - -void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); -} - // Replace ssa_copy calls created by PredicateInfo with their operand. static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { @@ -862,18 +841,6 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { } } -bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); - PredInfo->print(dbgs()); - if (VerifyPredicateInfo) - PredInfo->verifyPredicateInfo(); - - replaceCreatedSSACopys(*PredInfo, F); - return false; -} - PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult<DominatorTreeAnalysis>(F); diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 2e5f40d39912..717b6d301c8c 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugProgramInstruction.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -172,6 +173,7 @@ public: struct AllocaInfo { using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>; + using DPUserVec = SmallVector<DPValue *, 1>; SmallVector<BasicBlock *, 32> DefiningBlocks; SmallVector<BasicBlock *, 32> UsingBlocks; @@ -182,6 +184,7 @@ struct AllocaInfo { /// Debug users of the alloca - does not include dbg.assign intrinsics. DbgUserVec DbgUsers; + DPUserVec DPUsers; /// Helper to update assignment tracking debug info. AssignmentTrackingInfo AssignmentTracking; @@ -192,6 +195,7 @@ struct AllocaInfo { OnlyBlock = nullptr; OnlyUsedInOneBlock = true; DbgUsers.clear(); + DPUsers.clear(); AssignmentTracking.clear(); } @@ -225,7 +229,7 @@ struct AllocaInfo { } } DbgUserVec AllDbgUsers; - findDbgUsers(AllDbgUsers, AI); + findDbgUsers(AllDbgUsers, AI, &DPUsers); std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(), std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) { return !isa<DbgAssignIntrinsic>(DII); @@ -329,6 +333,7 @@ struct PromoteMem2Reg { /// describes it, if any, so that we can convert it to a dbg.value /// intrinsic if the alloca gets promoted. SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers; + SmallVector<AllocaInfo::DPUserVec, 8> AllocaDPUsers; /// For each alloca, keep an instance of a helper class that gives us an easy /// way to update assignment tracking debug info if the alloca is promoted. @@ -525,14 +530,18 @@ static bool rewriteSingleStoreAlloca( // Record debuginfo for the store and remove the declaration's // debuginfo. - for (DbgVariableIntrinsic *DII : Info.DbgUsers) { - if (DII->isAddressOfVariable()) { - ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); - DII->eraseFromParent(); - } else if (DII->getExpression()->startsWithDeref()) { - DII->eraseFromParent(); + auto ConvertDebugInfoForStore = [&](auto &Container) { + for (auto *DbgItem : Container) { + if (DbgItem->isAddressOfVariable()) { + ConvertDebugDeclareToDebugValue(DbgItem, Info.OnlyStore, DIB); + DbgItem->eraseFromParent(); + } else if (DbgItem->getExpression()->startsWithDeref()) { + DbgItem->eraseFromParent(); + } } - } + }; + ConvertDebugInfoForStore(Info.DbgUsers); + ConvertDebugInfoForStore(Info.DPUsers); // Remove dbg.assigns linked to the alloca as these are now redundant. at::deleteAssignmentMarkers(AI); @@ -629,12 +638,18 @@ static bool promoteSingleBlockAlloca( StoreInst *SI = cast<StoreInst>(AI->user_back()); // Update assignment tracking info for the store we're going to delete. Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete); + // Record debuginfo for the store before removing it. - for (DbgVariableIntrinsic *DII : Info.DbgUsers) { - if (DII->isAddressOfVariable()) { - ConvertDebugDeclareToDebugValue(DII, SI, DIB); + auto DbgUpdateForStore = [&](auto &Container) { + for (auto *DbgItem : Container) { + if (DbgItem->isAddressOfVariable()) { + ConvertDebugDeclareToDebugValue(DbgItem, SI, DIB); + } } - } + }; + DbgUpdateForStore(Info.DbgUsers); + DbgUpdateForStore(Info.DPUsers); + SI->eraseFromParent(); LBI.deleteValue(SI); } @@ -644,9 +659,14 @@ static bool promoteSingleBlockAlloca( AI->eraseFromParent(); // The alloca's debuginfo can be removed as well. - for (DbgVariableIntrinsic *DII : Info.DbgUsers) - if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) - DII->eraseFromParent(); + auto DbgUpdateForAlloca = [&](auto &Container) { + for (auto *DbgItem : Container) + if (DbgItem->isAddressOfVariable() || + DbgItem->getExpression()->startsWithDeref()) + DbgItem->eraseFromParent(); + }; + DbgUpdateForAlloca(Info.DbgUsers); + DbgUpdateForAlloca(Info.DPUsers); ++NumLocalPromoted; return true; @@ -657,6 +677,7 @@ void PromoteMem2Reg::run() { AllocaDbgUsers.resize(Allocas.size()); AllocaATInfo.resize(Allocas.size()); + AllocaDPUsers.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; @@ -720,6 +741,8 @@ void PromoteMem2Reg::run() { AllocaDbgUsers[AllocaNum] = Info.DbgUsers; if (!Info.AssignmentTracking.empty()) AllocaATInfo[AllocaNum] = Info.AssignmentTracking; + if (!Info.DPUsers.empty()) + AllocaDPUsers[AllocaNum] = Info.DPUsers; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -795,11 +818,16 @@ void PromoteMem2Reg::run() { } // Remove alloca's dbg.declare intrinsics from the function. - for (auto &DbgUsers : AllocaDbgUsers) { - for (auto *DII : DbgUsers) - if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) - DII->eraseFromParent(); - } + auto RemoveDbgDeclares = [&](auto &Container) { + for (auto &DbgUsers : Container) { + for (auto *DbgItem : DbgUsers) + if (DbgItem->isAddressOfVariable() || + DbgItem->getExpression()->startsWithDeref()) + DbgItem->eraseFromParent(); + } + }; + RemoveDbgDeclares(AllocaDbgUsers); + RemoveDbgDeclares(AllocaDPUsers); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can @@ -981,8 +1009,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), - Allocas[AllocaNo]->getName() + "." + Twine(Version++), - &BB->front()); + Allocas[AllocaNo]->getName() + "." + Twine(Version++)); + PN->insertBefore(BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; return true; @@ -1041,9 +1069,13 @@ NextIteration: // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB); - for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo]) - if (DII->isAddressOfVariable()) - ConvertDebugDeclareToDebugValue(DII, APN, DIB); + auto ConvertDbgDeclares = [&](auto &Container) { + for (auto *DbgItem : Container) + if (DbgItem->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DbgItem, APN, DIB); + }; + ConvertDbgDeclares(AllocaDbgUsers[AllocaNo]); + ConvertDbgDeclares(AllocaDPUsers[AllocaNo]); // Get the next phi node. ++PNI; @@ -1098,9 +1130,13 @@ NextIteration: IncomingLocs[AllocaNo] = SI->getDebugLoc(); AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, &DbgAssignsToDelete); - for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second]) - if (DII->isAddressOfVariable()) - ConvertDebugDeclareToDebugValue(DII, SI, DIB); + auto ConvertDbgDeclares = [&](auto &Container) { + for (auto *DbgItem : Container) + if (DbgItem->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DbgItem, SI, DIB); + }; + ConvertDbgDeclares(AllocaDbgUsers[ai->second]); + ConvertDbgDeclares(AllocaDPUsers[ai->second]); SI->eraseFromParent(); } } diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index c9ff94dc9744..ea628d7c3d7d 100644 --- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -153,17 +153,12 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) { Builder.SetInsertPoint(Load); Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration( &M, Intrinsic::load_relative, {Index->getType()}); - Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy()); // Create a call to load.relative intrinsic that computes the target address // by adding base address (lookup table address) and relative offset. - Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset}, + Value *Result = Builder.CreateCall(LoadRelIntrinsic, {RelLookupTable, Offset}, "reltable.intrinsic"); - // Create a bitcast instruction if necessary. - if (Load->getType() != Builder.getInt8PtrTy()) - Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast"); - // Replace load instruction with the new generated instruction sequence. Load->replaceAllUsesWith(Result); // Remove Load and GEP instructions. diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index de3626a24212..ab95698abc43 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -107,9 +107,7 @@ bool SCCPSolver::tryToReplaceWithConstant(Value *V) { static bool refineInstruction(SCCPSolver &Solver, const SmallPtrSetImpl<Value *> &InsertedValues, Instruction &Inst) { - if (!isa<OverflowingBinaryOperator>(Inst)) - return false; - + bool Changed = false; auto GetRange = [&Solver, &InsertedValues](Value *Op) { if (auto *Const = dyn_cast<ConstantInt>(Op)) return ConstantRange(Const->getValue()); @@ -120,23 +118,32 @@ static bool refineInstruction(SCCPSolver &Solver, return getConstantRange(Solver.getLatticeValueFor(Op), Op->getType(), /*UndefAllowed=*/false); }; - auto RangeA = GetRange(Inst.getOperand(0)); - auto RangeB = GetRange(Inst.getOperand(1)); - bool Changed = false; - if (!Inst.hasNoUnsignedWrap()) { - auto NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::BinaryOps(Inst.getOpcode()), RangeB, - OverflowingBinaryOperator::NoUnsignedWrap); - if (NUWRange.contains(RangeA)) { - Inst.setHasNoUnsignedWrap(); - Changed = true; + + if (isa<OverflowingBinaryOperator>(Inst)) { + auto RangeA = GetRange(Inst.getOperand(0)); + auto RangeB = GetRange(Inst.getOperand(1)); + if (!Inst.hasNoUnsignedWrap()) { + auto NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::BinaryOps(Inst.getOpcode()), RangeB, + OverflowingBinaryOperator::NoUnsignedWrap); + if (NUWRange.contains(RangeA)) { + Inst.setHasNoUnsignedWrap(); + Changed = true; + } } - } - if (!Inst.hasNoSignedWrap()) { - auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::BinaryOps(Inst.getOpcode()), RangeB, OverflowingBinaryOperator::NoSignedWrap); - if (NSWRange.contains(RangeA)) { - Inst.setHasNoSignedWrap(); + if (!Inst.hasNoSignedWrap()) { + auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::BinaryOps(Inst.getOpcode()), RangeB, + OverflowingBinaryOperator::NoSignedWrap); + if (NSWRange.contains(RangeA)) { + Inst.setHasNoSignedWrap(); + Changed = true; + } + } + } else if (isa<ZExtInst>(Inst) && !Inst.hasNonNeg()) { + auto Range = GetRange(Inst.getOperand(0)); + if (Range.isAllNonNegative()) { + Inst.setNonNeg(); Changed = true; } } @@ -171,6 +178,7 @@ static bool replaceSignedInst(SCCPSolver &Solver, if (InsertedValues.count(Op0) || !isNonNegative(Op0)) return false; NewInst = new ZExtInst(Op0, Inst.getType(), "", &Inst); + NewInst->setNonNeg(); break; } case Instruction::AShr: { @@ -179,6 +187,7 @@ static bool replaceSignedInst(SCCPSolver &Solver, if (InsertedValues.count(Op0) || !isNonNegative(Op0)) return false; NewInst = BinaryOperator::CreateLShr(Op0, Inst.getOperand(1), "", &Inst); + NewInst->setIsExact(Inst.isExact()); break; } case Instruction::SDiv: @@ -191,6 +200,8 @@ static bool replaceSignedInst(SCCPSolver &Solver, auto NewOpcode = Inst.getOpcode() == Instruction::SDiv ? Instruction::UDiv : Instruction::URem; NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst); + if (Inst.getOpcode() == Instruction::SDiv) + NewInst->setIsExact(Inst.isExact()); break; } default: @@ -1029,8 +1040,9 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, return; } - // Unwinding instructions successors are always executable. - if (TI.isExceptionalTerminator()) { + // We cannot analyze special terminators, so consider all successors + // executable. + if (TI.isSpecialTerminator()) { Succs.assign(TI.getNumSuccessors(), true); return; } @@ -1098,13 +1110,6 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, return; } - // In case of callbr, we pessimistically assume that all successors are - // feasible. - if (isa<CallBrInst>(&TI)) { - Succs.assign(TI.getNumSuccessors(), true); - return; - } - LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n'); llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } @@ -1231,10 +1236,12 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { if (Constant *OpC = getConstant(OpSt, I.getOperand(0)->getType())) { // Fold the constant as we build. - Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL); - markConstant(&I, C); - } else if (I.getDestTy()->isIntegerTy() && - I.getSrcTy()->isIntOrIntVectorTy()) { + if (Constant *C = + ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL)) + return (void)markConstant(&I, C); + } + + if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy()) { auto &LV = getValueState(&I); ConstantRange OpRange = getConstantRange(OpSt, I.getSrcTy()); @@ -1539,11 +1546,8 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { return (void)markOverdefined(&I); } - Constant *Ptr = Operands[0]; - auto Indices = ArrayRef(Operands.begin() + 1, Operands.end()); - Constant *C = - ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices); - markConstant(&I, C); + if (Constant *C = ConstantFoldInstOperands(&I, Operands, DL)) + markConstant(&I, C); } void SCCPInstVisitor::visitStoreInst(StoreInst &SI) { diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp index ebe9cb27f5ab..fc21fb552137 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -156,8 +156,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { } // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), - ProtoName, &BB->front()); + PHINode *InsertedPHI = + PHINode::Create(ProtoType, PredValues.size(), ProtoName); + InsertedPHI->insertBefore(BB->begin()); // Fill in all the predecessors of the PHI. for (const auto &PredValue : PredValues) @@ -198,12 +199,18 @@ void SSAUpdater::RewriteUse(Use &U) { void SSAUpdater::UpdateDebugValues(Instruction *I) { SmallVector<DbgValueInst *, 4> DbgValues; - llvm::findDbgValues(DbgValues, I); + SmallVector<DPValue *, 4> DPValues; + llvm::findDbgValues(DbgValues, I, &DPValues); for (auto &DbgValue : DbgValues) { if (DbgValue->getParent() == I->getParent()) continue; UpdateDebugValue(I, DbgValue); } + for (auto &DPV : DPValues) { + if (DPV->getParent() == I->getParent()) + continue; + UpdateDebugValue(I, DPV); + } } void SSAUpdater::UpdateDebugValues(Instruction *I, @@ -213,16 +220,31 @@ void SSAUpdater::UpdateDebugValues(Instruction *I, } } +void SSAUpdater::UpdateDebugValues(Instruction *I, + SmallVectorImpl<DPValue *> &DPValues) { + for (auto &DPV : DPValues) { + UpdateDebugValue(I, DPV); + } +} + void SSAUpdater::UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue) { BasicBlock *UserBB = DbgValue->getParent(); if (HasValueForBlock(UserBB)) { Value *NewVal = GetValueAtEndOfBlock(UserBB); DbgValue->replaceVariableLocationOp(I, NewVal); - } - else + } else DbgValue->setKillLocation(); } +void SSAUpdater::UpdateDebugValue(Instruction *I, DPValue *DPV) { + BasicBlock *UserBB = DPV->getParent(); + if (HasValueForBlock(UserBB)) { + Value *NewVal = GetValueAtEndOfBlock(UserBB); + DPV->replaceVariableLocationOp(I, NewVal); + } else + DPV->setKillLocation(); +} + void SSAUpdater::RewriteUseAfterInsertions(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); @@ -295,8 +317,9 @@ public: /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, - Updater->ProtoName, &BB->front()); + PHINode *PHI = + PHINode::Create(Updater->ProtoType, NumPreds, Updater->ProtoName); + PHI->insertBefore(BB->begin()); return PHI; } diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 31d62fbf0618..101b70d8def4 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -159,7 +159,7 @@ public: /// Get the total flow from a given source node. /// Returns a list of pairs (target node, amount of flow to the target). - const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { + std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { std::vector<std::pair<uint64_t, int64_t>> Flow; for (const auto &Edge : Edges[Src]) { if (Edge.Flow > 0) diff --git a/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/llvm/lib/Transforms/Utils/SanitizerStats.cpp index fd21ee4cc408..b80c5a6f9d68 100644 --- a/llvm/lib/Transforms/Utils/SanitizerStats.cpp +++ b/llvm/lib/Transforms/Utils/SanitizerStats.cpp @@ -21,7 +21,7 @@ using namespace llvm; SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) { - StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2); + StatTy = ArrayType::get(PointerType::getUnqual(M->getContext()), 2); EmptyModuleStatsTy = makeModuleStatsTy(); ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false, @@ -33,28 +33,28 @@ ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() { } StructType *SanitizerStatReport::makeModuleStatsTy() { - return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()), - Type::getInt32Ty(M->getContext()), - makeModuleStatsArrayTy()}); + return StructType::get(M->getContext(), + {PointerType::getUnqual(M->getContext()), + Type::getInt32Ty(M->getContext()), + makeModuleStatsArrayTy()}); } void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) { Function *F = B.GetInsertBlock()->getParent(); Module *M = F->getParent(); - PointerType *Int8PtrTy = B.getInt8PtrTy(); + PointerType *PtrTy = B.getPtrTy(); IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout()); - ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2); + ArrayType *StatTy = ArrayType::get(PtrTy, 2); Inits.push_back(ConstantArray::get( StatTy, - {Constant::getNullValue(Int8PtrTy), + {Constant::getNullValue(PtrTy), ConstantExpr::getIntToPtr( ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() - kSanitizerStatKindBits)), - Int8PtrTy)})); + PtrTy)})); - FunctionType *StatReportTy = - FunctionType::get(B.getVoidTy(), Int8PtrTy, false); + FunctionType *StatReportTy = FunctionType::get(B.getVoidTy(), PtrTy, false); FunctionCallee StatReport = M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy); @@ -64,7 +64,7 @@ void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) { ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2), ConstantInt::get(IntPtrTy, Inits.size() - 1), }); - B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy)); + B.CreateCall(StatReport, InitAddr); } void SanitizerStatReport::finish() { @@ -73,7 +73,7 @@ void SanitizerStatReport::finish() { return; } - PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); + PointerType *Int8PtrTy = PointerType::getUnqual(M->getContext()); IntegerType *Int32Ty = Type::getInt32Ty(M->getContext()); Type *VoidTy = Type::getVoidTy(M->getContext()); @@ -85,8 +85,7 @@ void SanitizerStatReport::finish() { {Constant::getNullValue(Int8PtrTy), ConstantInt::get(Int32Ty, Inits.size()), ConstantArray::get(makeModuleStatsArrayTy(), Inits)})); - ModuleStatsGV->replaceAllUsesWith( - ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType())); + ModuleStatsGV->replaceAllUsesWith(NewModuleStatsGV); ModuleStatsGV->eraseFromParent(); // Create a global constructor to register NewModuleStatsGV. @@ -99,7 +98,7 @@ void SanitizerStatReport::finish() { FunctionCallee StatInit = M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy); - B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy)); + B.CreateCall(StatInit, NewModuleStatsGV); B.CreateRetVoid(); appendToGlobalCtors(*M, F, 0); diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 20844271b943..cd3ac317cd23 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -170,11 +170,10 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { if (Op == Instruction::IntToPtr) { auto *PtrTy = cast<PointerType>(Ty); if (DL.isNonIntegralPointerType(PtrTy)) { - auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace()); assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 && "alloc size of i8 must by 1 byte for the GEP to be correct"); return Builder.CreateGEP( - Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "scevgep"); + Builder.getInt8Ty(), Constant::getNullValue(PtrTy), V, "scevgep"); } } // Short-circuit unnecessary bitcasts. @@ -313,11 +312,11 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// loop-invariant portions of expressions, after considering what /// can be folded using target addressing modes. /// -Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Type *Ty, Value *V) { +Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) { assert(!isa<Instruction>(V) || SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); - Value *Idx = expandCodeForImpl(Offset, Ty); + Value *Idx = expand(Offset); // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) @@ -339,7 +338,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Type *Ty, Value *V) { if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx && cast<GEPOperator>(&*IP)->getSourceElementType() == - Type::getInt8Ty(Ty->getContext())) + Builder.getInt8Ty()) return &*IP; if (IP == BlockBegin) break; } @@ -457,8 +456,6 @@ public: } Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - // Collect all the add operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal, and // so that pointer operands are inserted first, which the code below relies on @@ -498,20 +495,19 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { X = SE.getSCEV(U->getValue()); NewOps.push_back(X); } - Sum = expandAddToGEP(SE.getAddExpr(NewOps), Ty, Sum); + Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum); } else if (Op->isNonConstantNegative()) { // Instead of doing a negate and add, just do a subtract. - Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty); - Sum = InsertNoopCastOfTo(Sum, Ty); + Value *W = expand(SE.getNegativeSCEV(Op)); Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); ++I; } else { // A simple add. - Value *W = expandCodeForImpl(Op, Ty); - Sum = InsertNoopCastOfTo(Sum, Ty); + Value *W = expand(Op); // Canonicalize a constant to the RHS. - if (isa<Constant>(Sum)) std::swap(Sum, W); + if (isa<Constant>(Sum)) + std::swap(Sum, W); Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), /*IsSafeToHoist*/ true); ++I; @@ -522,7 +518,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { } Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = S->getType(); // Collect all the mul operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal. @@ -541,7 +537,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Expand the calculation of X pow N in the following manner: // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). - const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { + const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() { auto E = I; // Calculate how many times the same operand from the same loop is included // into this power. @@ -559,7 +555,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them // that are needed into the result. - Value *P = expandCodeForImpl(I->second, Ty); + Value *P = expand(I->second); Value *Result = nullptr; if (Exponent & 1) Result = P; @@ -584,14 +580,12 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { Prod = ExpandOpBinPowN(); } else if (I->second->isAllOnesValue()) { // Instead of doing a multiply by negative one, just do a negate. - Prod = InsertNoopCastOfTo(Prod, Ty); Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); ++I; } else { // A simple mul. Value *W = ExpandOpBinPowN(); - Prod = InsertNoopCastOfTo(Prod, Ty); // Canonicalize a constant to the RHS. if (isa<Constant>(Prod)) std::swap(Prod, W); const APInt *RHS; @@ -616,18 +610,16 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { } Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - - Value *LHS = expandCodeForImpl(S->getLHS(), Ty); + Value *LHS = expand(S->getLHS()); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) return InsertBinop(Instruction::LShr, LHS, - ConstantInt::get(Ty, RHS.logBase2()), + ConstantInt::get(SC->getType(), RHS.logBase2()), SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); } - Value *RHS = expandCodeForImpl(S->getRHS(), Ty); + Value *RHS = expand(S->getRHS()); return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); } @@ -803,12 +795,11 @@ bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may /// need to materialize IV increments elsewhere to handle difficult situations. Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, - Type *ExpandTy, Type *IntTy, bool useSubtract) { Value *IncV; // If the PHI is a pointer, use a GEP, otherwise use an add or sub. - if (ExpandTy->isPointerTy()) { - IncV = expandAddToGEP(SE.getSCEV(StepV), IntTy, PN); + if (PN->getType()->isPointerTy()) { + IncV = expandAddToGEP(SE.getSCEV(StepV), PN); } else { IncV = useSubtract ? Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : @@ -824,12 +815,11 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Requested, bool &InvertStep) { // We can't transform to match a pointer PHI. - if (Phi->getType()->isPointerTy()) + Type *PhiTy = Phi->getType(); + Type *RequestedTy = Requested->getType(); + if (PhiTy->isPointerTy() || RequestedTy->isPointerTy()) return false; - Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); - Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); - if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) return false; @@ -886,12 +876,10 @@ static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { /// values, and return the PHI. PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, - const Loop *L, - Type *ExpandTy, - Type *IntTy, - Type *&TruncTy, + const Loop *L, Type *&TruncTy, bool &InvertStep) { - assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); + assert((!IVIncInsertLoop || IVIncInsertPos) && + "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); @@ -962,7 +950,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // later. AddRecPhiMatch = &PN; IncV = TempIncV; - TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); + TruncTy = Normalized->getType(); } } @@ -996,8 +984,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, assert(L->getLoopPreheader() && "Can't expand add recurrences without a loop preheader!"); Value *StartV = - expandCodeForImpl(Normalized->getStart(), ExpandTy, - L->getLoopPreheader()->getTerminator()); + expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator()); // StartV must have been be inserted into L's preheader to dominate the new // phi. @@ -1008,6 +995,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Expand code for the step value. Do this before creating the PHI so that PHI // reuse code doesn't see an incomplete PHI. const SCEV *Step = Normalized->getStepRecurrence(SE); + Type *ExpandTy = Normalized->getType(); // If the stride is negative, insert a sub instead of an add for the increment // (unless it's a constant, because subtracts of constants are canonicalized // to adds). @@ -1015,8 +1003,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (useSubtract) Step = SE.getNegativeSCEV(Step); // Expand the step somewhere that dominates the loop header. - Value *StepV = expandCodeForImpl( - Step, IntTy, &*L->getHeader()->getFirstInsertionPt()); + Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt()); // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if // we actually do emit an addition. It does not apply if we emit a @@ -1047,7 +1034,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, Instruction *InsertPos = L == IVIncInsertLoop ? IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); - Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + Value *IncV = expandIVInc(PN, StepV, L, useSubtract); if (isa<OverflowingBinaryOperator>(IncV)) { if (IncrementIsNUW) @@ -1070,8 +1057,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, } Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { - Type *STy = S->getType(); - Type *IntTy = SE.getEffectiveSCEVType(STy); const Loop *L = S->getLoop(); // Determine a normalized form of this expression, which is the expression @@ -1084,51 +1069,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false)); } - // Strip off any non-loop-dominating component from the addrec start. - const SCEV *Start = Normalized->getStart(); - const SCEV *PostLoopOffset = nullptr; - if (!SE.properlyDominates(Start, L->getHeader())) { - PostLoopOffset = Start; - Start = SE.getConstant(Normalized->getType(), 0); - Normalized = cast<SCEVAddRecExpr>( - SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), - Normalized->getLoop(), - Normalized->getNoWrapFlags(SCEV::FlagNW))); - } - - // Strip off any non-loop-dominating component from the addrec step. + [[maybe_unused]] const SCEV *Start = Normalized->getStart(); const SCEV *Step = Normalized->getStepRecurrence(SE); - const SCEV *PostLoopScale = nullptr; - if (!SE.dominates(Step, L->getHeader())) { - PostLoopScale = Step; - Step = SE.getConstant(Normalized->getType(), 1); - if (!Start->isZero()) { - // The normalization below assumes that Start is constant zero, so if - // it isn't re-associate Start to PostLoopOffset. - assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?"); - PostLoopOffset = Start; - Start = SE.getConstant(Normalized->getType(), 0); - } - Normalized = - cast<SCEVAddRecExpr>(SE.getAddRecExpr( - Start, Step, Normalized->getLoop(), - Normalized->getNoWrapFlags(SCEV::FlagNW))); - } - - // Expand the core addrec. If we need post-loop scaling, force it to - // expand to an integer type to avoid the need for additional casting. - Type *ExpandTy = PostLoopScale ? IntTy : STy; - // We can't use a pointer type for the addrec if the pointer type is - // non-integral. - Type *AddRecPHIExpandTy = - DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; + assert(SE.properlyDominates(Start, L->getHeader()) && + "Start does not properly dominate loop header"); + assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header"); // In some cases, we decide to reuse an existing phi node but need to truncate // it and/or invert the step. Type *TruncTy = nullptr; bool InvertStep = false; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, - IntTy, TruncTy, InvertStep); + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1167,59 +1118,29 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // inserting an extra IV increment. StepV might fold into PostLoopOffset, // but hopefully expandCodeFor handles that. bool useSubtract = - !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + !S->getType()->isPointerTy() && Step->isNonConstantNegative(); if (useSubtract) Step = SE.getNegativeSCEV(Step); Value *StepV; { // Expand the step somewhere that dominates the loop header. SCEVInsertPointGuard Guard(Builder, this); - StepV = expandCodeForImpl( - Step, IntTy, &*L->getHeader()->getFirstInsertionPt()); + StepV = expand(Step, L->getHeader()->getFirstInsertionPt()); } - Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + Result = expandIVInc(PN, StepV, L, useSubtract); } } // We have decided to reuse an induction variable of a dominating loop. Apply // truncation and/or inversion of the step. if (TruncTy) { - Type *ResTy = Result->getType(); - // Normalize the result type. - if (ResTy != SE.getEffectiveSCEVType(ResTy)) - Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); // Truncate the result. if (TruncTy != Result->getType()) Result = Builder.CreateTrunc(Result, TruncTy); // Invert the result. if (InvertStep) - Result = Builder.CreateSub( - expandCodeForImpl(Normalized->getStart(), TruncTy), Result); - } - - // Re-apply any non-loop-dominating scale. - if (PostLoopScale) { - assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); - Result = InsertNoopCastOfTo(Result, IntTy); - Result = Builder.CreateMul(Result, - expandCodeForImpl(PostLoopScale, IntTy)); - } - - // Re-apply any non-loop-dominating offset. - if (PostLoopOffset) { - if (isa<PointerType>(ExpandTy)) { - if (Result->getType()->isIntegerTy()) { - Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy); - Result = expandAddToGEP(SE.getUnknown(Result), IntTy, Base); - } else { - Result = expandAddToGEP(PostLoopOffset, IntTy, Result); - } - } else { - Result = InsertNoopCastOfTo(Result, IntTy); - Result = Builder.CreateAdd( - Result, expandCodeForImpl(PostLoopOffset, IntTy)); - } + Result = Builder.CreateSub(expand(Normalized->getStart()), Result); } return Result; @@ -1260,8 +1181,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint()); - V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, - &*NewInsertPt); + V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt); return V; } @@ -1269,7 +1189,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { if (isa<PointerType>(S->getType())) { Value *StartV = expand(SE.getPointerBase(S)); - return expandAddToGEP(SE.removePointerBase(S), Ty, StartV); + return expandAddToGEP(SE.removePointerBase(S), StartV); } SmallVector<const SCEV *, 4> NewOps(S->operands()); @@ -1292,8 +1212,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // specified loop. BasicBlock *Header = L->getHeader(); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); - CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", - &Header->front()); + CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar"); + CanonicalIV->insertBefore(Header->begin()); rememberInstruction(CanonicalIV); SmallSet<BasicBlock *, 4> PredSeen; @@ -1361,34 +1281,25 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { - Value *V = - expandCodeForImpl(S->getOperand(), S->getOperand()->getType()); + Value *V = expand(S->getOperand()); return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt, GetOptimalInsertionPointForCastOf(V)); } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeForImpl( - S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()) - ); - return Builder.CreateTrunc(V, Ty); + Value *V = expand(S->getOperand()); + return Builder.CreateTrunc(V, S->getType()); } Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeForImpl( - S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()) - ); - return Builder.CreateZExt(V, Ty); + Value *V = expand(S->getOperand()); + return Builder.CreateZExt(V, S->getType(), "", + SE.isKnownNonNegative(S->getOperand())); } Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { - Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeForImpl( - S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()) - ); - return Builder.CreateSExt(V, Ty); + Value *V = expand(S->getOperand()); + return Builder.CreateSExt(V, S->getType()); } Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S, @@ -1399,7 +1310,7 @@ Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S, if (IsSequential) LHS = Builder.CreateFreeze(LHS); for (int i = S->getNumOperands() - 2; i >= 0; --i) { - Value *RHS = expandCodeForImpl(S->getOperand(i), Ty); + Value *RHS = expand(S->getOperand(i)); if (IsSequential && i != 0) RHS = Builder.CreateFreeze(RHS); Value *Sel; @@ -1440,14 +1351,14 @@ Value *SCEVExpander::visitVScale(const SCEVVScale *S) { return Builder.CreateVScale(ConstantInt::get(S->getType(), 1)); } -Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, - Instruction *IP) { +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, + BasicBlock::iterator IP) { setInsertPoint(IP); - Value *V = expandCodeForImpl(SH, Ty); + Value *V = expandCodeFor(SH, Ty); return V; } -Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) { +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); @@ -1459,8 +1370,64 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) { return V; } -Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, - const Instruction *InsertPt) { +static bool +canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I, + SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) { + // If the instruction cannot be poison, it's always safe to reuse. + if (programUndefinedIfPoison(I)) + return true; + + // Otherwise, it is possible that I is more poisonous that S. Collect the + // poison-contributors of S, and then check whether I has any additional + // poison-contributors. Poison that is contributed through poison-generating + // flags is handled by dropping those flags instead. + SmallPtrSet<const Value *, 8> PoisonVals; + SE.getPoisonGeneratingValues(PoisonVals, S); + + SmallVector<Value *> Worklist; + SmallPtrSet<Value *, 8> Visited; + Worklist.push_back(I); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // Avoid walking large instruction graphs. + if (Visited.size() > 16) + return false; + + // Either the value can't be poison, or the S would also be poison if it + // is. + if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) + continue; + + auto *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // FIXME: Ignore vscale, even though it technically could be poison. Do this + // because SCEV currently assumes it can't be poison. Remove this special + // case once we proper model when vscale can be poison. + if (auto *II = dyn_cast<IntrinsicInst>(I); + II && II->getIntrinsicID() == Intrinsic::vscale) + continue; + + if (canCreatePoison(cast<Operator>(I), /*ConsiderFlagsAndMetadata*/ false)) + return false; + + // If the instruction can't create poison, we can recurse to its operands. + if (I->hasPoisonGeneratingFlagsOrMetadata()) + DropPoisonGeneratingInsts.push_back(I); + + for (Value *Op : I->operands()) + Worklist.push_back(Op); + } + return true; +} + +Value *SCEVExpander::FindValueInExprValueMap( + const SCEV *S, const Instruction *InsertPt, + SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) { // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. if (!CanonicalMode && SE.containsAddRecurrence(S)) @@ -1470,20 +1437,24 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, if (isa<SCEVConstant>(S)) return nullptr; - // Choose a Value from the set which dominates the InsertPt. - // InsertPt should be inside the Value's parent loop so as not to break - // the LCSSA form. for (Value *V : SE.getSCEVValues(S)) { Instruction *EntInst = dyn_cast<Instruction>(V); if (!EntInst) continue; + // Choose a Value from the set which dominates the InsertPt. + // InsertPt should be inside the Value's parent loop so as not to break + // the LCSSA form. assert(EntInst->getFunction() == InsertPt->getFunction()); - if (S->getType() == V->getType() && - SE.DT.dominates(EntInst, InsertPt) && - (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) + if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) || + !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr || + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) + continue; + + // Make sure reusing the instruction is poison-safe. + if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts)) return V; + DropPoisonGeneratingInsts.clear(); } return nullptr; } @@ -1497,7 +1468,7 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. - Instruction *InsertPt = &*Builder.GetInsertPoint(); + BasicBlock::iterator InsertPt = Builder.GetInsertPoint(); // We can move insertion point only if there is no div or rem operations // otherwise we are risky to move it over the check for zero denominator. @@ -1521,24 +1492,25 @@ Value *SCEVExpander::expand(const SCEV *S) { L = L->getParentLoop()) { if (SE.isLoopInvariant(S, L)) { if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) - InsertPt = Preheader->getTerminator(); - else + if (BasicBlock *Preheader = L->getLoopPreheader()) { + InsertPt = Preheader->getTerminator()->getIterator(); + } else { // LSR sets the insertion point for AddRec start/step values to the // block start to simplify value reuse, even though it's an invalid // position. SCEVExpander must correct for this in all cases. - InsertPt = &*L->getHeader()->getFirstInsertionPt(); + InsertPt = L->getHeader()->getFirstInsertionPt(); + } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) - InsertPt = &*L->getHeader()->getFirstInsertionPt(); + InsertPt = L->getHeader()->getFirstInsertionPt(); - while (InsertPt->getIterator() != Builder.GetInsertPoint() && - (isInsertedInstruction(InsertPt) || - isa<DbgInfoIntrinsic>(InsertPt))) { - InsertPt = &*std::next(InsertPt->getIterator()); + while (InsertPt != Builder.GetInsertPoint() && + (isInsertedInstruction(&*InsertPt) || + isa<DbgInfoIntrinsic>(&*InsertPt))) { + InsertPt = std::next(InsertPt); } break; } @@ -1546,26 +1518,40 @@ Value *SCEVExpander::expand(const SCEV *S) { } // Check to see if we already expanded this here. - auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); + auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt)); if (I != InsertedExpressions.end()) return I->second; SCEVInsertPointGuard Guard(Builder, this); - Builder.SetInsertPoint(InsertPt); + Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. - Value *V = FindValueInExprValueMap(S, InsertPt); + SmallVector<Instruction *> DropPoisonGeneratingInsts; + Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts); if (!V) { V = visit(S); V = fixupLCSSAFormFor(V); } else { - // If we're reusing an existing instruction, we are effectively CSEing two - // copies of the instruction (with potentially different flags). As such, - // we need to drop any poison generating flags unless we can prove that - // said flags must be valid for all new users. - if (auto *I = dyn_cast<Instruction>(V)) - if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) - I->dropPoisonGeneratingFlags(); + for (Instruction *I : DropPoisonGeneratingInsts) { + I->dropPoisonGeneratingFlagsAndMetadata(); + // See if we can re-infer from first principles any of the flags we just + // dropped. + if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) + if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) { + auto *BO = cast<BinaryOperator>(I); + BO->setHasNoUnsignedWrap( + ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW); + BO->setHasNoSignedWrap( + ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW); + } + if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) { + auto *Src = NNI->getOperand(0); + if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src, + Constant::getNullValue(Src->getType()), I, + DL).value_or(false)) + NNI->setNonNeg(true); + } + } } // Remember the expanded value for this SCEV at this location. // @@ -1573,7 +1559,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // the expression at this insertion point. If the mapped value happened to be // a postinc expansion, it could be reused by a non-postinc user, but only if // its insertion point was already at the head of the loop. - InsertedExpressions[std::make_pair(S, InsertPt)] = V; + InsertedExpressions[std::make_pair(S, &*InsertPt)] = V; return V; } @@ -1710,13 +1696,13 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, << *IsomorphicInc << '\n'); Value *NewInc = OrigInc; if (OrigInc->getType() != IsomorphicInc->getType()) { - Instruction *IP = nullptr; + BasicBlock::iterator IP; if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) - IP = &*PN->getParent()->getFirstInsertionPt(); + IP = PN->getParent()->getFirstInsertionPt(); else - IP = OrigInc->getNextNode(); + IP = OrigInc->getNextNonDebugInstruction()->getIterator(); - IRBuilder<> Builder(IP); + IRBuilder<> Builder(IP->getParent(), IP); Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); NewInc = Builder.CreateTruncOrBitCast( OrigInc, IsomorphicInc->getType(), IVName); @@ -1734,7 +1720,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { - IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); + IRBuilder<> Builder(L->getHeader(), + L->getHeader()->getFirstInsertionPt()); Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); } @@ -1744,9 +1731,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } -Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S, - const Instruction *At, - Loop *L) { +bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S, + const Instruction *At, + Loop *L) { using namespace llvm::PatternMatch; SmallVector<BasicBlock *, 4> ExitingBlocks; @@ -1763,17 +1750,18 @@ Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S, continue; if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) - return LHS; + return true; if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) - return RHS; + return true; } // Use expand's logic which is used for reusing a previous Value in // ExprValueMap. Note that we don't currently model the cost of // needing to drop poison generating flags on the instruction if we // want to reuse it. We effectively assume that has zero cost. - return FindValueInExprValueMap(S, At); + SmallVector<Instruction *> DropPoisonGeneratingInsts; + return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr; } template<typename T> static InstructionCost costAndCollectOperands( @@ -1951,7 +1939,7 @@ bool SCEVExpander::isHighCostExpansionHelper( // If we can find an existing value for this scev available at the point "At" // then consider the expression cheap. - if (getRelatedExistingExpansion(S, &At, L)) + if (hasRelatedExistingExpansion(S, &At, L)) return false; // Consider the expression to be free. TargetTransformInfo::TargetCostKind CostKind = @@ -1993,7 +1981,7 @@ bool SCEVExpander::isHighCostExpansionHelper( // At the beginning of this function we already tried to find existing // value for plain 'S'. Now try to lookup 'S + 1' since it is common // pattern involving division. This is just a simple search heuristic. - if (getRelatedExistingExpansion( + if (hasRelatedExistingExpansion( SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) return false; // Consider it to be free. @@ -2045,10 +2033,8 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *IP) { - Value *Expr0 = - expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP); - Value *Expr1 = - expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP); + Value *Expr0 = expand(Pred->getLHS(), IP); + Value *Expr1 = expand(Pred->getRHS(), IP); Builder.SetInsertPoint(IP); auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate()); @@ -2080,17 +2066,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // Step >= 0, Start + |Step| * Backedge > Start // and |Step| * Backedge doesn't unsigned overflow. - IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); Builder.SetInsertPoint(Loc); - Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc); + Value *TripCountVal = expand(ExitCount, Loc); IntegerType *Ty = IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); - Value *StepValue = expandCodeForImpl(Step, Ty, Loc); - Value *NegStepValue = - expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc); - Value *StartValue = expandCodeForImpl(Start, ARTy, Loc); + Value *StepValue = expand(Step, Loc); + Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc); + Value *StartValue = expand(Start, Loc); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits)); @@ -2136,9 +2120,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, bool NeedPosCheck = !SE.isKnownNegative(Step); bool NeedNegCheck = !SE.isKnownPositive(Step); - if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) { - StartValue = InsertNoopCastOfTo( - StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace())); + if (isa<PointerType>(ARTy)) { Value *NegMulV = Builder.CreateNeg(MulV); if (NeedPosCheck) Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV); @@ -2171,7 +2153,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // If the backedge taken count type is larger than the AR type, // check that we don't drop any bits by truncating it. If we are // dropping bits, then we have overflow (unless the step is zero). - if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) { + if (SrcBits > DstBits) { auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); auto *BackedgeCheck = Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal, @@ -2244,7 +2226,7 @@ Value *SCEVExpander::fixupLCSSAFormFor(Value *V) { // instruction. Type *ToTy; if (DefI->getType()->isIntegerTy()) - ToTy = DefI->getType()->getPointerTo(); + ToTy = PointerType::get(DefI->getContext(), 0); else ToTy = Type::getInt32Ty(DefI->getContext()); Instruction *User = @@ -2306,12 +2288,6 @@ struct SCEVFindUnsafe { } } if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEV *Step = AR->getStepRecurrence(SE); - if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { - IsUnsafe = true; - return false; - } - // For non-affine addrecs or in non-canonical mode we need a preheader // to insert into. if (!AR->getLoop()->getLoopPreheader() && diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index d3a9a41aef15..c09cf9c2325c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -271,7 +271,10 @@ class SimplifyCFGOpt { bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); - bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly); + bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly); + bool hoistSuccIdenticalTerminatorToSwitchOrIf( + Instruction *TI, Instruction *I1, + SmallVectorImpl<Instruction *> &OtherSuccTIs); bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB); bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, BasicBlock *TrueBB, BasicBlock *FalseBB, @@ -499,7 +502,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { return CI; else return cast<ConstantInt>( - ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); + ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL)); } return nullptr; } @@ -819,7 +822,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - llvm::erase_value(Cases, BB); + llvm::erase(Cases, BB); } /// Return true if there are any keys in C1 that exist in C2 as well. @@ -1098,12 +1101,13 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // Note that there may be multiple predecessor blocks, so we cannot move // bonus instructions to a predecessor block. for (Instruction &BonusInst : *BB) { - if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator()) + if (BonusInst.isTerminator()) continue; Instruction *NewBonusInst = BonusInst.clone(); - if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { + if (!isa<DbgInfoIntrinsic>(BonusInst) && + PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { // Unless the instruction has the same !dbg location as the original // branch, drop it. When we fold the bonus instructions we want to make // sure we reset their debug locations in order to avoid stepping on @@ -1113,7 +1117,6 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&BonusInst] = NewBonusInst; // If we speculated an instruction, we need to drop any metadata that may // result in undefined behavior, as the metadata might have been valid @@ -1123,8 +1126,16 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( NewBonusInst->dropUBImplyingAttrsAndMetadata(); NewBonusInst->insertInto(PredBlock, PTI->getIterator()); + auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst); + RemapDPValueRange(NewBonusInst->getModule(), Range, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + if (isa<DbgInfoIntrinsic>(BonusInst)) + continue; + NewBonusInst->takeName(&BonusInst); BonusInst.setName(NewBonusInst->getName() + ".old"); + VMap[&BonusInst] = NewBonusInst; // Update (liveout) uses of bonus instructions, // now that the bonus instruction has been cloned into predecessor. @@ -1303,7 +1314,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( } for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : NewSuccessors) { - for (auto I : seq(0, NewSuccessor.second)) { + for (auto I : seq(NewSuccessor.second)) { (void)I; AddPredecessorToBlock(NewSuccessor.first, Pred, BB); } @@ -1408,8 +1419,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, } // If we would need to insert a select that uses the value of this invoke -// (comments in HoistThenElseCodeToIf explain why we would need to do this), we -// can't hoist the invoke, as there is nowhere to put the select in this case. +// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would +// need to do this), we can't hoist the invoke, as there is nowhere to put the +// select in this case. static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2) { for (BasicBlock *Succ : successors(BB1)) { @@ -1424,9 +1436,9 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, return true; } -// Get interesting characteristics of instructions that `HoistThenElseCodeToIf` -// didn't hoist. They restrict what kind of instructions can be reordered -// across. +// Get interesting characteristics of instructions that +// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of +// instructions can be reordered across. enum SkipFlags { SkipReadMem = 1, SkipSideEffect = 2, @@ -1484,7 +1496,7 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) { static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); -/// Helper function for HoistThenElseCodeToIf. Return true if identical +/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical /// instructions \p I1 and \p I2 can and should be hoisted. static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI) { @@ -1515,62 +1527,51 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, return true; } -/// Given a conditional branch that goes to BB1 and BB2, hoist any common code -/// in the two blocks up into the branch block. The caller of this function -/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, -/// only perform hoisting in case both blocks only contain a terminator. In that -/// case, only the original BI will be replaced and selects for PHIs are added. -bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { +/// Hoist any common code in the successor blocks up into the block. This +/// function guarantees that BB dominates all successors. If EqTermsOnly is +/// given, only perform hoisting in case both blocks only contain a terminator. +/// In that case, only the original BI will be replaced and selects for PHIs are +/// added. +bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, + bool EqTermsOnly) { // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. In particular, we don't want to get into - // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // instructions in the two blocks. In particular, we don't want to get into + // O(N1*N2*...) situations here where Ni are the sizes of these successors. As // such, we currently just scan for obviously identical instructions in an // identical order, possibly separated by the same number of non-identical // instructions. - BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. - BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + unsigned int SuccSize = succ_size(BB); + if (SuccSize < 2) + return false; // If either of the blocks has it's address taken, then we can't do this fold, // because the code we'd hoist would no longer run when we jump into the block // by it's address. - if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) - return false; + for (auto *Succ : successors(BB)) + if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor()) + return false; - BasicBlock::iterator BB1_Itr = BB1->begin(); - BasicBlock::iterator BB2_Itr = BB2->begin(); + auto *TI = BB->getTerminator(); - Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa<DbgInfoIntrinsic>(I1)) - I1 = &*BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = &*BB2_Itr++; + // The second of pair is a SkipFlags bitmask. + using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>; + SmallVector<SuccIterPair, 8> SuccIterPairs; + for (auto *Succ : successors(BB)) { + BasicBlock::iterator SuccItr = Succ->begin(); + if (isa<PHINode>(*SuccItr)) + return false; + SuccIterPairs.push_back(SuccIterPair(SuccItr, 0)); } - if (isa<PHINode>(I1)) - return false; - - BasicBlock *BIParent = BI->getParent(); - - bool Changed = false; - - auto _ = make_scope_exit([&]() { - if (Changed) - ++NumHoistCommonCode; - }); // Check if only hoisting terminators is allowed. This does not add new // instructions to the hoist location. if (EqTermsOnly) { // Skip any debug intrinsics, as they are free to hoist. - auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); - auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); - if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) - return false; - if (!I1NonDbg->isTerminator()) - return false; + for (auto &SuccIter : make_first_range(SuccIterPairs)) { + auto *INonDbg = &*skipDebugIntrinsics(SuccIter); + if (!INonDbg->isTerminator()) + return false; + } // Now we know that we only need to hoist debug intrinsics and the // terminator. Let the loop below handle those 2 cases. } @@ -1579,153 +1580,235 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { // many instructions we skip, serving as a compilation time control as well as // preventing excessive increase of life ranges. unsigned NumSkipped = 0; + // If we find an unreachable instruction at the beginning of a basic block, we + // can still hoist instructions from the rest of the basic blocks. + if (SuccIterPairs.size() > 2) { + erase_if(SuccIterPairs, + [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); }); + if (SuccIterPairs.size() < 2) + return false; + } - // Record any skipped instuctions that may read memory, write memory or have - // side effects, or have implicit control flow. - unsigned SkipFlagsBB1 = 0; - unsigned SkipFlagsBB2 = 0; + bool Changed = false; for (;;) { + auto *SuccIterPairBegin = SuccIterPairs.begin(); + auto &BB1ItrPair = *SuccIterPairBegin++; + auto OtherSuccIterPairRange = + iterator_range(SuccIterPairBegin, SuccIterPairs.end()); + auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange); + + Instruction *I1 = &*BB1ItrPair.first; + auto *BB1 = I1->getParent(); + + // Skip debug info if it is not identical. + bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) { + Instruction *I2 = &*Iter; + return I1->isIdenticalToWhenDefined(I2); + }); + if (!AllDbgInstsAreIdentical) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = &*++BB1ItrPair.first; + for (auto &SuccIter : OtherSuccIterRange) { + Instruction *I2 = &*SuccIter; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = &*++SuccIter; + } + } + + bool AllInstsAreIdentical = true; + bool HasTerminator = I1->isTerminator(); + for (auto &SuccIter : OtherSuccIterRange) { + Instruction *I2 = &*SuccIter; + HasTerminator |= I2->isTerminator(); + if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2)) + AllInstsAreIdentical = false; + } + // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. - if (I1->isTerminator() || I2->isTerminator()) { + if (HasTerminator) { + // Even if BB, which contains only one unreachable instruction, is ignored + // at the beginning of the loop, we can hoist the terminator instruction. // If any instructions remain in the block, we cannot hoist terminators. - if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) + if (NumSkipped || !AllInstsAreIdentical) return Changed; - goto HoistTerminator; + SmallVector<Instruction *, 8> Insts; + for (auto &SuccIter : OtherSuccIterRange) + Insts.push_back(&*SuccIter); + return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed; } - if (I1->isIdenticalToWhenDefined(I2) && - // Even if the instructions are identical, it may not be safe to hoist - // them if we have skipped over instructions with side effects or their - // operands weren't hoisted. - isSafeToHoistInstr(I1, SkipFlagsBB1) && - isSafeToHoistInstr(I2, SkipFlagsBB2) && - shouldHoistCommonInstructions(I1, I2, TTI)) { - if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) { - assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2)); + if (AllInstsAreIdentical) { + unsigned SkipFlagsBB1 = BB1ItrPair.second; + AllInstsAreIdentical = + isSafeToHoistInstr(I1, SkipFlagsBB1) && + all_of(OtherSuccIterPairRange, [=](const auto &Pair) { + Instruction *I2 = &*Pair.first; + unsigned SkipFlagsBB2 = Pair.second; + // Even if the instructions are identical, it may not + // be safe to hoist them if we have skipped over + // instructions with side effects or their operands + // weren't hoisted. + return isSafeToHoistInstr(I2, SkipFlagsBB2) && + shouldHoistCommonInstructions(I1, I2, TTI); + }); + } + + if (AllInstsAreIdentical) { + BB1ItrPair.first++; + if (isa<DbgInfoIntrinsic>(I1)) { // The debug location is an integral part of a debug info intrinsic // and can't be separated from it or replaced. Instead of attempting // to merge locations, simply hoist both copies of the intrinsic. - BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); - BIParent->splice(BI->getIterator(), BB2, I2->getIterator()); + I1->moveBeforePreserving(TI); + for (auto &SuccIter : OtherSuccIterRange) { + auto *I2 = &*SuccIter++; + assert(isa<DbgInfoIntrinsic>(I2)); + I2->moveBeforePreserving(TI); + } } else { // For a normal instruction, we just move one to right before the // branch, then replace all uses of the other with the first. Finally, // we remove the now redundant second instruction. - BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->andIRFlags(I2); - combineMetadataForCSE(I1, I2, true); - - // I1 and I2 are being combined into a single instruction. Its debug - // location is the merged locations of the original instructions. - I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - I2->eraseFromParent(); + I1->moveBeforePreserving(TI); + BB->splice(TI->getIterator(), BB1, I1->getIterator()); + for (auto &SuccIter : OtherSuccIterRange) { + Instruction *I2 = &*SuccIter++; + assert(I2 != I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + combineMetadataForCSE(I1, I2, true); + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + I2->eraseFromParent(); + } } + if (!Changed) + NumHoistCommonCode += SuccIterPairs.size(); Changed = true; - ++NumHoistCommonInstrs; + NumHoistCommonInstrs += SuccIterPairs.size(); } else { if (NumSkipped >= HoistCommonSkipLimit) return Changed; // We are about to skip over a pair of non-identical instructions. Record // if any have characteristics that would prevent reordering instructions // across them. - SkipFlagsBB1 |= skippedInstrFlags(I1); - SkipFlagsBB2 |= skippedInstrFlags(I2); + for (auto &SuccIterPair : SuccIterPairs) { + Instruction *I = &*SuccIterPair.first++; + SuccIterPair.second |= skippedInstrFlags(I); + } ++NumSkipped; } - - I1 = &*BB1_Itr++; - I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa<DbgInfoIntrinsic>(I1)) - I1 = &*BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = &*BB2_Itr++; - } } +} - return Changed; +bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( + Instruction *TI, Instruction *I1, + SmallVectorImpl<Instruction *> &OtherSuccTIs) { -HoistTerminator: - // It may not be possible to hoist an invoke. + auto *BI = dyn_cast<BranchInst>(TI); + + bool Changed = false; + BasicBlock *TIParent = TI->getParent(); + BasicBlock *BB1 = I1->getParent(); + + // Use only for an if statement. + auto *I2 = *OtherSuccTIs.begin(); + auto *BB2 = I2->getParent(); + if (BI) { + assert(OtherSuccTIs.size() == 1); + assert(BI->getSuccessor(0) == I1->getParent()); + assert(BI->getSuccessor(1) == I2->getParent()); + } + + // In the case of an if statement, we try to hoist an invoke. // FIXME: Can we define a safety predicate for CallBr? - if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) - return Changed; + // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll + // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit? + if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2))) + return false; // TODO: callbr hoisting currently disabled pending further study. if (isa<CallBrInst>(I1)) - return Changed; + return false; for (BasicBlock *Succ : successors(BB1)) { for (PHINode &PN : Succ->phis()) { Value *BB1V = PN.getIncomingValueForBlock(BB1); - Value *BB2V = PN.getIncomingValueForBlock(BB2); - if (BB1V == BB2V) - continue; + for (Instruction *OtherSuccTI : OtherSuccTIs) { + Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent()); + if (BB1V == BB2V) + continue; - // Check for passingValueIsAlwaysUndefined here because we would rather - // eliminate undefined control flow then converting it to a select. - if (passingValueIsAlwaysUndefined(BB1V, &PN) || - passingValueIsAlwaysUndefined(BB2V, &PN)) - return Changed; + // In the case of an if statement, check for + // passingValueIsAlwaysUndefined here because we would rather eliminate + // undefined control flow then converting it to a select. + if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) || + passingValueIsAlwaysUndefined(BB2V, &PN)) + return false; + } } } // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); - NT->insertInto(BIParent, BI->getIterator()); + NT->insertInto(TIParent, TI->getIterator()); if (!NT->getType()->isVoidTy()) { I1->replaceAllUsesWith(NT); - I2->replaceAllUsesWith(NT); + for (Instruction *OtherSuccTI : OtherSuccTIs) + OtherSuccTI->replaceAllUsesWith(NT); NT->takeName(I1); } Changed = true; - ++NumHoistCommonInstrs; + NumHoistCommonInstrs += OtherSuccTIs.size() + 1; // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. - NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + SmallVector<DILocation *, 4> Locs; + Locs.push_back(I1->getDebugLoc()); + for (auto *OtherSuccTI : OtherSuccTIs) + Locs.push_back(OtherSuccTI->getDebugLoc()); + NT->setDebugLoc(DILocation::getMergedLocations(Locs)); // PHIs created below will adopt NT's merged DebugLoc. IRBuilder<NoFolder> Builder(NT); - // Hoisting one of the terminators from our successor is a great thing. - // Unfortunately, the successors of the if/else blocks may have PHI nodes in - // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI - // nodes, so we insert select instruction to compute the final result. - std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; - for (BasicBlock *Succ : successors(BB1)) { - for (PHINode &PN : Succ->phis()) { - Value *BB1V = PN.getIncomingValueForBlock(BB1); - Value *BB2V = PN.getIncomingValueForBlock(BB2); - if (BB1V == BB2V) - continue; + // In the case of an if statement, hoisting one of the terminators from our + // successor is a great thing. Unfortunately, the successors of the if/else + // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2 + // must agree for all PHI nodes, so we insert select instruction to compute + // the final result. + if (BI) { + std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; + for (BasicBlock *Succ : successors(BB1)) { + for (PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; - // These values do not agree. Insert a select instruction before NT - // that determines the right value. - SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (!SI) { - // Propagate fast-math-flags from phi node to its replacement select. - IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); - if (isa<FPMathOperator>(PN)) - Builder.setFastMathFlags(PN.getFastMathFlags()); + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (!SI) { + // Propagate fast-math-flags from phi node to its replacement select. + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + if (isa<FPMathOperator>(PN)) + Builder.setFastMathFlags(PN.getFastMathFlags()); - SI = cast<SelectInst>( - Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, - BB1V->getName() + "." + BB2V->getName(), BI)); - } + SI = cast<SelectInst>(Builder.CreateSelect( + BI->getCondition(), BB1V, BB2V, + BB1V->getName() + "." + BB2V->getName(), BI)); + } - // Make the PHI node use the select for all incoming values for BB1/BB2 - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) - if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) - PN.setIncomingValue(i, SI); + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) + PN.setIncomingValue(i, SI); + } } } @@ -1733,16 +1816,16 @@ HoistTerminator: // Update any PHI nodes in our new successors. for (BasicBlock *Succ : successors(BB1)) { - AddPredecessorToBlock(Succ, BIParent, BB1); + AddPredecessorToBlock(Succ, TIParent, BB1); if (DTU) - Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + Updates.push_back({DominatorTree::Insert, TIParent, Succ}); } if (DTU) - for (BasicBlock *Succ : successors(BI)) - Updates.push_back({DominatorTree::Delete, BIParent, Succ}); + for (BasicBlock *Succ : successors(TI)) + Updates.push_back({DominatorTree::Delete, TIParent, Succ}); - EraseTerminatorAndDCECond(BI); + EraseTerminatorAndDCECond(TI); if (DTU) DTU->applyUpdates(Updates); return Changed; @@ -1808,10 +1891,19 @@ static bool canSinkInstructions( } const Instruction *I0 = Insts.front(); - for (auto *I : Insts) + for (auto *I : Insts) { if (!I->isSameOperationAs(I0)) return false; + // swifterror pointers can only be used by a load or store; sinking a load + // or store would require introducing a select for the pointer operand, + // which isn't allowed for swifterror pointers. + if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError()) + return false; + if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError()) + return false; + } + // All instructions in Insts are known to be the same opcode. If they have a // use, check that the only user is a PHI or in the same block as the // instruction, because if a user is in the same block as an instruction we're @@ -1952,8 +2044,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { // Create a new PHI in the successor block and populate it. auto *Op = I0->getOperand(O); assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); - auto *PN = PHINode::Create(Op->getType(), Insts.size(), - Op->getName() + ".sink", &BBEnd->front()); + auto *PN = + PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink"); + PN->insertBefore(BBEnd->begin()); for (auto *I : Insts) PN->addIncoming(I->getOperand(O), I->getParent()); NewOperands.push_back(PN); @@ -1963,7 +2056,8 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { // and move it to the start of the successor block. for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) I0->getOperandUse(O).set(NewOperands[O]); - I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt()); // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) @@ -2765,8 +2859,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, Value *OrigV = PN.getIncomingValueForBlock(BB); Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. - // Skip PHIs which are trivial. + // FIXME: Try to remove some of the duplication with + // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial. if (ThenV == OrigV) continue; @@ -3009,7 +3103,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, // store %merge, %x.dest, !DIAssignID !2 // dbg.assign %merge, "x", ..., !2 for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) { - if (any_of(DAI->location_ops(), [&](Value *V) { return V == OrigV; })) + if (llvm::is_contained(DAI->location_ops(), OrigV)) DAI->replaceVariableLocationOp(OrigV, S); } } @@ -3036,6 +3130,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, } // Hoist the instructions. + // In "RemoveDIs" non-instr debug-info mode, drop DPValues attached to these + // instructions, in the same way that dbg.value intrinsics are dropped at the + // end of this block. + for (auto &It : make_range(ThenBB->begin(), ThenBB->end())) + It.dropDbgValues(); BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(), std::prev(ThenBB->end())); @@ -3207,6 +3306,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt(); DenseMap<Value *, Value *> TranslateMap; // Track translated values. TranslateMap[Cond] = CB; + + // RemoveDIs: track instructions that we optimise away while folding, so + // that we can copy DPValues from them later. + BasicBlock::iterator SrcDbgCursor = BB->begin(); for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (PHINode *PN = dyn_cast<PHINode>(BBI)) { TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB); @@ -3241,6 +3344,15 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, TranslateMap[&*BBI] = N; } if (N) { + // Copy all debug-info attached to instructions from the last we + // successfully clone, up to this instruction (they might have been + // folded away). + for (; SrcDbgCursor != BBI; ++SrcDbgCursor) + N->cloneDebugInfoFrom(&*SrcDbgCursor); + SrcDbgCursor = std::next(BBI); + // Clone debug-info on this instruction too. + N->cloneDebugInfoFrom(&*BBI); + // Register the new instruction with the assumption cache if necessary. if (auto *Assume = dyn_cast<AssumeInst>(N)) if (AC) @@ -3248,6 +3360,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, } } + for (; &*SrcDbgCursor != BI; ++SrcDbgCursor) + InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor); + InsertPt->cloneDebugInfoFrom(BI); + BB->removePredecessor(EdgeBB); BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator()); EdgeBI->setSuccessor(0, RealDest); @@ -3652,22 +3768,22 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, ValueToValueMapTy VMap; // maps original values to cloned values CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); + Module *M = BB->getModule(); + + if (PredBlock->IsNewDbgInfoFormat) { + PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator()); + for (DPValue &DPV : PredBlock->getTerminator()->getDbgValueRange()) { + RemapDPValue(M, &DPV, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + } + } + // Now that the Cond was cloned into the predecessor basic block, // or/and the two conditions together. Value *BICond = VMap[BI->getCondition()]; PBI->setCondition( createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond")); - // Copy any debug value intrinsics into the end of PredBlock. - for (Instruction &I : *BB) { - if (isa<DbgInfoIntrinsic>(I)) { - Instruction *NewI = I.clone(); - RemapInstruction(NewI, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - NewI->insertBefore(PBI); - } - } - ++NumFoldBranchToCommonDest; return true; } @@ -3867,7 +3983,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB)) return V; - PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); + PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge"); + PHI->insertBefore(Succ->begin()); PHI->addIncoming(V, BB); for (BasicBlock *PredBB : predecessors(Succ)) if (PredBB != BB) @@ -3991,7 +4108,9 @@ static bool mergeConditionalStoreToAddress( Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), QStore->getParent(), PPHI); - IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); + BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt(); + IRBuilder<> QB(PostBB, PostBBFirst); + QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc()); Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); @@ -4002,9 +4121,11 @@ static bool mergeConditionalStoreToAddress( QPred = QB.CreateNot(QPred); Value *CombinedPred = QB.CreateOr(PPred, QPred); - auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), + BasicBlock::iterator InsertPt = QB.GetInsertPoint(); + auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt, /*Unreachable=*/false, /*BranchWeights=*/nullptr, DTU); + QB.SetInsertPoint(T); StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata())); @@ -4140,10 +4261,10 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // 2) We can sink side effecting instructions into BI's fallthrough // successor provided they doesn't contribute to computation of // BI's condition. - Value *CondWB, *WC; - BasicBlock *IfTrueBB, *IfFalseBB; - if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) || - IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor()) + BasicBlock *IfTrueBB = PBI->getSuccessor(0); + BasicBlock *IfFalseBB = PBI->getSuccessor(1); + if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() || + !BI->getParent()->getSinglePredecessor()) return false; if (!IfFalseBB->phis().empty()) return false; // TODO @@ -4256,6 +4377,21 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (PBI->getSuccessor(PBIOp) == BB) return false; + // If predecessor's branch probability to BB is too low don't merge branches. + SmallVector<uint32_t, 2> PredWeights; + if (!PBI->getMetadata(LLVMContext::MD_unpredictable) && + extractBranchWeights(*PBI, PredWeights) && + (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) { + + BranchProbability CommonDestProb = BranchProbability::getBranchProbability( + PredWeights[PBIOp], + static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]); + + BranchProbability Likely = TTI.getPredictableBranchThreshold(); + if (CommonDestProb >= Likely) + return false; + } + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. @@ -5088,6 +5224,15 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { bool Changed = false; + // Ensure that any debug-info records that used to occur after the Unreachable + // are moved to in front of it -- otherwise they'll "dangle" at the end of + // the block. + BB->flushTerminatorDbgValues(); + + // Debug-info records on the unreachable inst itself should be deleted, as + // below we delete everything past the final executable instruction. + UI->dropDbgValues(); + // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI->getIterator() != BB->begin()) { @@ -5104,6 +5249,10 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn, // and we can therefore guarantee this block will be erased. + // If we're deleting this, we're deleting any subsequent dbg.values, so + // delete DPValue records of variable information. + BBI->dropDbgValues(); + // Delete this instruction (any uses are guaranteed to be dead) BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); BBI->eraseFromParent(); @@ -5667,7 +5816,7 @@ getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) { if (I.isTerminator()) { // If the terminator is a simple branch, continue to the next block. - if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator()) + if (I.getNumSuccessors() != 1 || I.isSpecialTerminator()) return false; Pred = CaseDest; CaseDest = I.getSuccessor(0); @@ -5890,8 +6039,8 @@ static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, // Remove the switch. - while (PHI->getBasicBlockIndex(SelectBB) >= 0) - PHI->removeIncomingValue(SelectBB); + PHI->removeIncomingValueIf( + [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; }); PHI->addIncoming(SelectValue, SelectBB); SmallPtrSet<BasicBlock *, 4> RemovedSuccessors; @@ -6051,8 +6200,9 @@ SwitchLookupTable::SwitchLookupTable( bool LinearMappingPossible = true; APInt PrevVal; APInt DistToPrev; - // When linear map is monotonic, we can attach nsw. - bool Wrapped = false; + // When linear map is monotonic and signed overflow doesn't happen on + // maximum index, we can attach nsw on Add and Mul. + bool NonMonotonic = false; assert(TableSize >= 2 && "Should be a SingleValue table."); // Check if there is the same distance between two consecutive values. for (uint64_t I = 0; I < TableSize; ++I) { @@ -6072,7 +6222,7 @@ SwitchLookupTable::SwitchLookupTable( LinearMappingPossible = false; break; } - Wrapped |= + NonMonotonic |= Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal); } PrevVal = Val; @@ -6080,7 +6230,10 @@ SwitchLookupTable::SwitchLookupTable( if (LinearMappingPossible) { LinearOffset = cast<ConstantInt>(TableContents[0]); LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev); - LinearMapValWrapped = Wrapped; + bool MayWrap = false; + APInt M = LinearMultiplier->getValue(); + (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap); + LinearMapValWrapped = NonMonotonic || MayWrap; Kind = LinearMapKind; ++NumLinearMaps; return; @@ -6503,9 +6656,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If the default destination is unreachable, or if the lookup table covers // all values of the conditional variable, branch directly to the lookup table // BB. Otherwise, check that the condition is within the case range. - const bool DefaultIsReachable = + bool DefaultIsReachable = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); @@ -6536,6 +6688,28 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, BranchInst *RangeCheckBranch = nullptr; + // Grow the table to cover all possible index values to avoid the range check. + // It will use the default result to fill in the table hole later, so make + // sure it exist. + if (UseSwitchConditionAsTableIndex && HasDefaultResults) { + ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false); + // Grow the table shouldn't have any size impact by checking + // WouldFitInRegister. + // TODO: Consider growing the table also when it doesn't fit in a register + // if no optsize is specified. + const uint64_t UpperBound = CR.getUpper().getLimitedValue(); + if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) { + return SwitchLookupTable::WouldFitInRegister( + DL, UpperBound, KV.second /* ResultType */); + })) { + // The default branch is unreachable after we enlarge the lookup table. + // Adjust DefaultIsReachable to reuse code path. + TableSize = UpperBound; + DefaultIsReachable = false; + } + } + + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); if (DTU) @@ -6697,9 +6871,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, // This transform can be done speculatively because it is so cheap - it // results in a single rotate operation being inserted. - // FIXME: It's possible that optimizing a switch on powers of two might also - // be beneficial - flag values are often powers of two and we could use a CLZ - // as the key function. // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than // one element and LLVM disallows duplicate cases, Shift is guaranteed to be @@ -6744,6 +6915,80 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, return true; } +/// Tries to transform switch of powers of two to reduce switch range. +/// For example, switch like: +/// switch (C) { case 1: case 2: case 64: case 128: } +/// will be transformed to: +/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: } +/// +/// This transformation allows better lowering and could allow transforming into +/// a lookup table. +static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + Value *Condition = SI->getCondition(); + LLVMContext &Context = SI->getContext(); + auto *CondTy = cast<IntegerType>(Condition->getType()); + + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) + return false; + + const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::cttz, CondTy, + {Condition, ConstantInt::getTrue(Context)}), + TTI::TCK_SizeAndLatency); + + if (CttzIntrinsicCost > TTI::TCC_Basic) + // Inserting intrinsic is too expensive. + return false; + + // Only bother with this optimization if there are more than 3 switch cases. + // SDAG will only bother creating jump tables for 4 or more cases. + if (SI->getNumCases() < 4) + return false; + + // We perform this optimization only for switches with + // unreachable default case. + // This assumtion will save us from checking if `Condition` is a power of two. + if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) + return false; + + // Check that switch cases are powers of two. + SmallVector<uint64_t, 4> Values; + for (const auto &Case : SI->cases()) { + uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue(); + if (llvm::has_single_bit(CaseValue)) + Values.push_back(CaseValue); + else + return false; + } + + // isSwichDense requires case values to be sorted. + llvm::sort(Values); + if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) - + llvm::countr_zero(Values.front()) + 1)) + // Transform is unable to generate dense switch. + return false; + + Builder.SetInsertPoint(SI); + + // Replace each case with its trailing zeros number. + for (auto &Case : SI->cases()) { + auto *OrigValue = Case.getCaseValue(); + Case.setValue(ConstantInt::get(OrigValue->getType(), + OrigValue->getValue().countr_zero())); + } + + // Replace condition with its trailing zeros number. + auto *ConditionTrailingZeros = Builder.CreateIntrinsic( + Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)}); + + SI->setCondition(ConditionTrailingZeros); + + return true; +} + bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -6791,9 +7036,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) return requestResimplify(); + if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) + return requestResimplify(); + if (ReduceSwitchRange(SI, Builder, DL, TTI)) return requestResimplify(); + if (HoistCommon && + hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts)) + return requestResimplify(); + return false; } @@ -6978,7 +7230,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + if (Options.SpeculateBlocks && + FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; @@ -7048,7 +7301,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + if (Options.SpeculateBlocks && + FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); @@ -7058,7 +7312,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts)) + if (HoistCommon && hoistCommonCodeFromSuccessors( + BI->getParent(), !Options.HoistCommonInsts)) return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index a28916bc9baf..722ed03db3de 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -539,7 +539,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { for (auto *ICI : ICmpUsers) { bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); - Instruction *Ext = nullptr; + IRBuilder<> Builder(ICI); + Value *Ext = nullptr; // For signed/unsigned predicate, replace the old comparison with comparison // of immediate IV against sext/zext of the invariant argument. If we can // use either sext or zext (i.e. we are dealing with equality predicate), @@ -550,18 +551,18 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred); if (CanUseZExt(ICI)) { assert(DoesZExtCollapse && "Unprofitable zext?"); - Ext = new ZExtInst(Op1, IVTy, "zext", ICI); + Ext = Builder.CreateZExt(Op1, IVTy, "zext"); Pred = ICmpInst::getUnsignedPredicate(Pred); } else { assert(DoesSExtCollapse && "Unprofitable sext?"); - Ext = new SExtInst(Op1, IVTy, "sext", ICI); + Ext = Builder.CreateSExt(Op1, IVTy, "sext"); assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); } bool Changed; L->makeLoopInvariant(Ext, Changed); (void)Changed; - ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext); - ICI->replaceAllUsesWith(NewICI); + auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext); + ICI->replaceAllUsesWith(NewCmp); DeadInsts.emplace_back(ICI); } @@ -659,12 +660,12 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0)); // Get the symbolic expression for this instruction. const SCEV *IV = SE->getSCEV(IVOperand); - unsigned MaskBits; + int MaskBits; if (UseInst->getOpcode() == CastInst::SIToFP) - MaskBits = SE->getSignedRange(IV).getMinSignedBits(); + MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits(); else - MaskBits = SE->getUnsignedRange(IV).getActiveBits(); - unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); + MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits(); + int DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { // Match for fptosi/fptoui of sitofp and with same type. @@ -908,8 +909,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { if (replaceIVUserWithLoopInvariant(UseInst)) continue; - // Go further for the bitcast ''prtoint ptr to i64' - if (isa<PtrToIntInst>(UseInst)) + // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done + // by truncation + if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst))) for (Use &U : UseInst->uses()) { Instruction *User = cast<Instruction>(U.getUser()); if (replaceIVUserWithLoopInvariant(User)) @@ -1373,16 +1375,32 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); - const SCEV *ExtendOperExpr = nullptr; const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(DU.NarrowUse); ExtendKind ExtKind = getExtendKind(DU.NarrowDef); - if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) - ExtendOperExpr = SE->getSignExtendExpr( - SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap()) - ExtendOperExpr = SE->getZeroExtendExpr( - SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + if (!(ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) && + !(ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())) { + ExtKind = ExtendKind::Unknown; + + // For a non-negative NarrowDef, we can choose either type of + // extension. We want to use the current extend kind if legal + // (see above), and we only hit this code if we need to check + // the opposite case. + if (DU.NeverNegative) { + if (OBO->hasNoSignedWrap()) { + ExtKind = ExtendKind::Sign; + } else if (OBO->hasNoUnsignedWrap()) { + ExtKind = ExtendKind::Zero; + } + } + } + + const SCEV *ExtendOperExpr = + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)); + if (ExtKind == ExtendKind::Sign) + ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType); + else if (ExtKind == ExtendKind::Zero) + ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType); else return {nullptr, ExtendKind::Unknown}; @@ -1493,10 +1511,6 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. - auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); - if (!InsertPt) - return false; - IRBuilder<> Builder(InsertPt); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); // Widen the other operand of the compare, if necessary. @@ -1673,7 +1687,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { assert(LoopExitingBlock && L->contains(LoopExitingBlock) && "Not a LCSSA Phi?"); WidePN->addIncoming(WideBO, LoopExitingBlock); - Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt()); + Builder.SetInsertPoint(User->getParent(), + User->getParent()->getFirstInsertionPt()); auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); User->replaceAllUsesWith(TruncPN); DeadInsts.emplace_back(User); @@ -1726,7 +1741,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", UsePhi); WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); - IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); + BasicBlock *WidePhiBB = WidePhi->getParent(); + IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt()); Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); UsePhi->replaceAllUsesWith(Trunc); DeadInsts.emplace_back(UsePhi); @@ -1786,65 +1802,70 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri return nullptr; } - // Does this user itself evaluate to a recurrence after widening? - WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); - if (!WideAddRec.first) - WideAddRec = getWideRecurrence(DU); - - assert((WideAddRec.first == nullptr) == - (WideAddRec.second == ExtendKind::Unknown)); - if (!WideAddRec.first) { - // If use is a loop condition, try to promote the condition instead of - // truncating the IV first. - if (widenLoopCompare(DU)) + auto tryAddRecExpansion = [&]() -> Instruction* { + // Does this user itself evaluate to a recurrence after widening? + WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); + if (!WideAddRec.first) + WideAddRec = getWideRecurrence(DU); + assert((WideAddRec.first == nullptr) == + (WideAddRec.second == ExtendKind::Unknown)); + if (!WideAddRec.first) return nullptr; - // We are here about to generate a truncate instruction that may hurt - // performance because the scalar evolution expression computed earlier - // in WideAddRec.first does not indicate a polynomial induction expression. - // In that case, look at the operands of the use instruction to determine - // if we can still widen the use instead of truncating its operand. - if (widenWithVariantUse(DU)) + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = nullptr; + if (WideAddRec.first == WideIncExpr && + Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) + WideUse = WideInc; + else { + WideUse = cloneIVUser(DU, WideAddRec.first); + if (!WideUse) + return nullptr; + } + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simply throw away the newly created wide use. + if (WideAddRec.first != SE->getSCEV(WideUse)) { + LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " + << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first + << "\n"); + DeadInsts.emplace_back(WideUse); return nullptr; + }; - // This user does not evaluate to a recurrence after widening, so don't - // follow it. Instead insert a Trunc to kill off the original use, - // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT, LI); - return nullptr; - } + // if we reached this point then we are going to replace + // DU.NarrowUse with WideUse. Reattach DbgValue then. + replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); - // Reuse the IV increment that SCEVExpander created as long as it dominates - // NarrowUse. - Instruction *WideUse = nullptr; - if (WideAddRec.first == WideIncExpr && - Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) - WideUse = WideInc; - else { - WideUse = cloneIVUser(DU, WideAddRec.first); - if (!WideUse) - return nullptr; - } - // Evaluation of WideAddRec ensured that the narrow expression could be - // extended outside the loop without overflow. This suggests that the wide use - // evaluates to the same expression as the extended narrow use, but doesn't - // absolutely guarantee it. Hence the following failsafe check. In rare cases - // where it fails, we simply throw away the newly created wide use. - if (WideAddRec.first != SE->getSCEV(WideUse)) { - LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " - << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first - << "\n"); - DeadInsts.emplace_back(WideUse); + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; + // Returning WideUse pushes it on the worklist. + return WideUse; + }; + + if (auto *I = tryAddRecExpansion()) + return I; + + // If use is a loop condition, try to promote the condition instead of + // truncating the IV first. + if (widenLoopCompare(DU)) return nullptr; - } - // if we reached this point then we are going to replace - // DU.NarrowUse with WideUse. Reattach DbgValue then. - replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); + // We are here about to generate a truncate instruction that may hurt + // performance because the scalar evolution expression computed earlier + // in WideAddRec.first does not indicate a polynomial induction expression. + // In that case, look at the operands of the use instruction to determine + // if we can still widen the use instead of truncating its operand. + if (widenWithVariantUse(DU)) + return nullptr; - ExtendKindMap[DU.NarrowUse] = WideAddRec.second; - // Returning WideUse pushes it on the worklist. - return WideUse; + // This user does not evaluate to a recurrence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + truncateIVUse(DU, DT, LI); + return nullptr; } /// Add eligible users of NarrowDef to NarrowIVUsers. @@ -1944,13 +1965,15 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. if (BasicBlock *LatchBlock = L->getLoopLatch()) { WideInc = - cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); - WideIncExpr = SE->getSCEV(WideInc); - // Propagate the debug location associated with the original loop increment - // to the new (widened) increment. - auto *OrigInc = - cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); - WideInc->setDebugLoc(OrigInc->getDebugLoc()); + dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); + if (WideInc) { + WideIncExpr = SE->getSCEV(WideInc); + // Propagate the debug location associated with the original loop + // increment to the new (widened) increment. + auto *OrigInc = + cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); + WideInc->setDebugLoc(OrigInc->getDebugLoc()); + } } LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5b0951252c07..760a626c8b6f 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -227,9 +227,21 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr, return ConstantInt::get(RetTy, Result); } +static bool isOnlyUsedInComparisonWithZero(Value *V) { + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, const DataLayout &DL) { - if (!isOnlyUsedInZeroComparison(CI)) + if (!isOnlyUsedInComparisonWithZero(CI)) return false; if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL)) @@ -1136,7 +1148,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { // fold strstr(x, x) -> x. if (CI->getArgOperand(0) == CI->getArgOperand(1)) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + return CI->getArgOperand(0); // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { @@ -1164,7 +1176,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { // fold strstr(x, "") -> x. if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + return CI->getArgOperand(0); // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { @@ -1174,16 +1186,13 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { return Constant::getNullValue(CI->getType()); // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = castToCStr(CI->getArgOperand(0), B); - Result = - B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr"); - return B.CreateBitCast(Result, CI->getType()); + return B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), CI->getArgOperand(0), + Offset, "strstr"); } // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; + return emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); } annotateNonNullNoUndefBasedOnAccess(CI, {0, 1}); @@ -1380,7 +1389,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { if (isOnlyUsedInEqualityComparison(CI, SrcStr)) // S is dereferenceable so it's safe to load from it and fold // memchr(S, C, N) == S to N && *S == C for any C and N. - // TODO: This is safe even even for nonconstant S. + // TODO: This is safe even for nonconstant S. return memChrToCharCompare(CI, Size, B, DL); // From now on we need a constant length and constant array. @@ -1522,12 +1531,10 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS if (Len == 1) { - Value *LHSV = - B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"), - CI->getType(), "lhsv"); - Value *RHSV = - B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"), - CI->getType(), "rhsv"); + Value *LHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), LHS, "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), RHS, "rhsc"), + CI->getType(), "rhsv"); return B.CreateSub(LHSV, RHSV, "chardiff"); } @@ -1833,7 +1840,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B, StringRef CallerName = CI->getFunction()->getName(); if (!CallerName.empty() && CallerName.back() == 'f' && CallerName.size() == (CalleeName.size() + 1) && - CallerName.startswith(CalleeName)) + CallerName.starts_with(CalleeName)) return nullptr; } @@ -2368,8 +2375,8 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) { FMF.setNoSignedZeros(); B.setFastMathFlags(FMF); - Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum - : Intrinsic::maxnum; + Intrinsic::ID IID = Callee->getName().starts_with("fmin") ? Intrinsic::minnum + : Intrinsic::maxnum; Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType()); return copyFlags( *CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)})); @@ -3066,7 +3073,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); - Value *Ptr = castToCStr(Dest, B); + Value *Ptr = Dest; B.CreateStore(V, Ptr); Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); @@ -3093,9 +3100,6 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, return ConstantInt::get(CI->getType(), SrcLen - 1); } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) { // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest - // Handle mismatched pointer types (goes away with typeless pointers?). - V = B.CreatePointerCast(V, B.getInt8PtrTy()); - Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy()); Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest); return B.CreateIntCast(PtrDiff, CI->getType(), false); } @@ -3261,7 +3265,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, if (!CI->getArgOperand(3)->getType()->isIntegerTy()) return nullptr; Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); - Value *Ptr = castToCStr(DstArg, B); + Value *Ptr = DstArg; B.CreateStore(V, Ptr); Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); @@ -3397,8 +3401,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) { // If this is writing one byte, turn it into fputc. // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(B.getInt8Ty(), - castToCStr(CI->getArgOperand(0), B), "char"); + Value *Char = B.CreateLoad(B.getInt8Ty(), CI->getArgOperand(0), "char"); Type *IntTy = B.getIntNTy(TLI->getIntSize()); Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari"); Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI); diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index 0ff88e8b4612..6094f36a77f4 100644 --- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -18,8 +18,6 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Statepoint.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" using namespace llvm; @@ -66,21 +64,3 @@ PreservedAnalyses StripGCRelocates::run(Function &F, PA.preserveSet<CFGAnalyses>(); return PA; } - -namespace { -struct StripGCRelocatesLegacy : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - StripGCRelocatesLegacy() : FunctionPass(ID) { - initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &Info) const override {} - - bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); } -}; -char StripGCRelocatesLegacy::ID = 0; -} // namespace - -INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates", - "Strip gc.relocates inserted through RewriteStatepointsForGC", - true, false) diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index c3ae43e567b0..8b4f34209e85 100644 --- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -68,8 +68,6 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 2b706858cbed..d5468909dd4e 100644 --- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -16,33 +16,9 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils.h" using namespace llvm; -char UnifyFunctionExitNodesLegacyPass::ID = 0; - -UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass() - : FunctionPass(ID) { - initializeUnifyFunctionExitNodesLegacyPassPass( - *PassRegistry::getPassRegistry()); -} - -INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn", - "Unify function exit nodes", false, false) - -Pass *llvm::createUnifyFunctionExitNodesPass() { - return new UnifyFunctionExitNodesLegacyPass(); -} - -void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage( - AnalysisUsage &AU) const { - // We preserve the non-critical-edgeness property - AU.addPreservedID(BreakCriticalEdgesID); - // This is a cluster of orthogonal Transforms - AU.addPreservedID(LowerSwitchID); -} - namespace { bool unifyUnreachableBlocks(Function &F) { @@ -110,16 +86,6 @@ bool unifyReturnBlocks(Function &F) { } } // namespace -// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting -// all returns to unconditional branches to this new basic block. Also, unify -// all unreachable blocks. -bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) { - bool Changed = false; - Changed |= unifyUnreachableBlocks(F); - Changed |= unifyReturnBlocks(F); - return Changed; -} - PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = false; diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 8c781f59ff5a..2f37f7f972cb 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -44,10 +44,8 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(LowerSwitchID); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreservedID(LowerSwitchID); AU.addPreserved<LoopInfoWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); } @@ -65,7 +63,6 @@ FunctionPass *llvm::createUnifyLoopExitsPass() { INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits", "Fixup each natural loop to have a single exit block", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits", @@ -234,6 +231,8 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) { auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); + return runImpl(LI, DT); } diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index 91c743f17764..51e1e824dd26 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -21,7 +21,6 @@ using namespace llvm; /// initializeTransformUtils - Initialize all passes in the TransformUtils /// library. void llvm::initializeTransformUtils(PassRegistry &Registry) { - initializeAssumeBuilderPassLegacyPassPass(Registry); initializeBreakCriticalEdgesPass(Registry); initializeCanonicalizeFreezeInLoopsPass(Registry); initializeLCSSAWrapperPassPass(Registry); @@ -30,9 +29,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLowerInvokeLegacyPassPass(Registry); initializeLowerSwitchLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); - initializeUnifyFunctionExitNodesLegacyPassPass(Registry); - initializeStripGCRelocatesLegacyPass(Registry); - initializePredicateInfoPrinterLegacyPassPass(Registry); initializeFixIrreduciblePass(Registry); initializeUnifyLoopExitsLegacyPassPass(Registry); } diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp index 3446e31cc2ef..71d0f09e4771 100644 --- a/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -145,6 +146,7 @@ public: Value *mapValue(const Value *V); void remapInstruction(Instruction *I); void remapFunction(Function &F); + void remapDPValue(DPValue &DPV); Constant *mapConstant(const Constant *C) { return cast_or_null<Constant>(mapValue(C)); @@ -535,6 +537,39 @@ Value *Mapper::mapValue(const Value *V) { return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); } +void Mapper::remapDPValue(DPValue &V) { + // Remap variables and DILocations. + auto *MappedVar = mapMetadata(V.getVariable()); + auto *MappedDILoc = mapMetadata(V.getDebugLoc()); + V.setVariable(cast<DILocalVariable>(MappedVar)); + V.setDebugLoc(DebugLoc(cast<DILocation>(MappedDILoc))); + + // Find Value operands and remap those. + SmallVector<Value *, 4> Vals, NewVals; + for (Value *Val : V.location_ops()) + Vals.push_back(Val); + for (Value *Val : Vals) + NewVals.push_back(mapValue(Val)); + + // If there are no changes to the Value operands, finished. + if (Vals == NewVals) + return; + + bool IgnoreMissingLocals = Flags & RF_IgnoreMissingLocals; + + // Otherwise, do some replacement. + if (!IgnoreMissingLocals && + llvm::any_of(NewVals, [&](Value *V) { return V == nullptr; })) { + V.setKillLocation(); + } else { + // Either we have all non-empty NewVals, or we're permitted to ignore + // missing locals. + for (unsigned int I = 0; I < Vals.size(); ++I) + if (NewVals[I]) + V.replaceVariableLocationOp(I, NewVals[I]); + } +} + Value *Mapper::mapBlockAddress(const BlockAddress &BA) { Function *F = cast<Function>(mapValue(BA.getFunction())); @@ -1179,6 +1214,17 @@ void ValueMapper::remapInstruction(Instruction &I) { FlushingMapper(pImpl)->remapInstruction(&I); } +void ValueMapper::remapDPValue(Module *M, DPValue &V) { + FlushingMapper(pImpl)->remapDPValue(V); +} + +void ValueMapper::remapDPValueRange( + Module *M, iterator_range<DPValue::self_iterator> Range) { + for (DPValue &DPV : Range) { + remapDPValue(M, DPV); + } +} + void ValueMapper::remapFunction(Function &F) { FlushingMapper(pImpl)->remapFunction(F); } |
