diff options
Diffstat (limited to 'lib/Transforms/Utils')
30 files changed, 1579 insertions, 730 deletions
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 5fa371377c85..d85cc40c372a 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -170,7 +170,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, - MemoryDependenceResults *MemDep) { + MemoryDependenceResults *MemDep, + bool PredecessorWithTwoSuccessors) { if (BB->hasAddressTaken()) return false; @@ -185,9 +186,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, return false; // Can't merge if there are multiple distinct successors. - if (PredBB->getUniqueSuccessor() != BB) + if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB) return false; + // Currently only allow PredBB to have two predecessors, one being BB. + // Update BI to branch to BB's only successor instead of BB. + BranchInst *PredBB_BI; + BasicBlock *NewSucc = nullptr; + unsigned FallThruPath; + if (PredecessorWithTwoSuccessors) { + if (!(PredBB_BI = dyn_cast<BranchInst>(PredBB->getTerminator()))) + return false; + BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BB_JmpI || !BB_JmpI->isUnconditional()) + return false; + NewSucc = BB_JmpI->getSuccessor(0); + FallThruPath = PredBB_BI->getSuccessor(0) == BB ? 0 : 1; + } + // Can't merge if there is PHI loop. for (PHINode &PN : BB->phis()) for (Value *IncValue : PN.incoming_values()) @@ -227,18 +243,39 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, Updates.push_back({DominatorTree::Delete, PredBB, BB}); } - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin())); + Instruction *PTI = PredBB->getTerminator(); + Instruction *STI = BB->getTerminator(); + Instruction *Start = &*BB->begin(); + // If there's nothing to move, mark the starting instruction as the last + // instruction in the block. + if (Start == STI) + Start = PTI; + + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PTI->getIterator(), BB->getInstList(), + BB->begin(), STI->getIterator()); - // Delete the unconditional branch from the predecessor... - PredBB->getInstList().pop_back(); + if (MSSAU) + MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start); // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); - // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + if (PredecessorWithTwoSuccessors) { + // Delete the unconditional branch from BB. + BB->getInstList().pop_back(); + + // Update branch in the predecessor. + PredBB_BI->setSuccessor(FallThruPath, NewSucc); + } else { + // Delete the unconditional branch from the predecessor. + PredBB->getInstList().pop_back(); + + // Move terminator instruction. + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + } + // Add unreachable to now empty BB. new UnreachableInst(BB->getContext(), BB); // Eliminate duplicate dbg.values describing the entry PHI node post-splice. @@ -274,11 +311,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, "applying corresponding DTU updates."); DTU->applyUpdatesPermissive(Updates); DTU->deleteBB(BB); - } - - else { + } else { BB->eraseFromParent(); // Nuke BB if DTU is nullptr. } + return true; } @@ -365,11 +401,13 @@ llvm::SplitAllCriticalEdges(Function &F, BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, const Twine &BBName) { BasicBlock::iterator SplitIt = SplitPt->getIterator(); while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) ++SplitIt; - BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); + std::string Name = BBName.str(); + BasicBlock *New = Old->splitBasicBlock( + SplitIt, Name.empty() ? Old->getName() + ".split" : Name); // The new block lives in whichever loop the old one did. This preserves // LCSSA as well, because we force the split point to be after any PHI nodes. diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 27f110e24f9c..71316ce8f758 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -88,6 +88,14 @@ static bool setDoesNotCapture(Function &F, unsigned ArgNo) { return true; } +static bool setDoesNotAlias(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoAlias)) + return false; + F.addParamAttr(ArgNo, Attribute::NoAlias); + ++NumNoAlias; + return true; +} + static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) return false; @@ -175,6 +183,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_strcpy: case LibFunc_strncpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + LLVM_FALLTHROUGH; case LibFunc_strcat: case LibFunc_strncat: Changed |= setReturnedArg(F, 0); @@ -249,12 +260,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_sprintf: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); return Changed; @@ -291,11 +304,23 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + Changed |= setReturnedArg(F, 0); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; case LibFunc_memmove: Changed |= setReturnedArg(F, 0); - LLVM_FALLTHROUGH; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; case LibFunc_mempcpy: case LibFunc_memccpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); @@ -760,9 +785,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { } } -bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { +bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) { switch (Ty->getTypeID()) { case Type::HalfTyID: return false; @@ -775,10 +799,10 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, } } -StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && +StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn) { + assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && "Cannot get name for unavailable function!"); switch (Ty->getTypeID()) { @@ -827,6 +851,12 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI); } +Value *llvm::emitStrDup(Value *Ptr, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(), + castToCStr(Ptr, B), B, TLI); +} + Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetLibraryInfo *TLI) { Type *I8Ptr = B.getInt8PtrTy(); @@ -1045,24 +1075,28 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, LibFunc LongDoubleFn, IRBuilder<> &B, const AttributeList &Attrs) { // Get the name of the function according to TLI. - StringRef Name = getUnaryFloatFn(TLI, Op->getType(), - DoubleFn, FloatFn, LongDoubleFn); + StringRef Name = getFloatFnName(TLI, Op->getType(), + DoubleFn, FloatFn, LongDoubleFn); return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs); } -Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, - IRBuilder<> &B, const AttributeList &Attrs) { +static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, + StringRef Name, IRBuilder<> &B, + const AttributeList &Attrs) { assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); - SmallString<20> NameBuffer; - appendTypeSuffix(Op1, Name, NameBuffer); - Module *M = B.GetInsertBlock()->getModule(); - FunctionCallee Callee = M->getOrInsertFunction( - Name, Op1->getType(), Op1->getType(), Op2->getType()); - CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); - CI->setAttributes(Attrs); + FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(), + Op1->getType(), Op2->getType()); + CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); if (const Function *F = dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1070,6 +1104,28 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, return CI; } +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, + IRBuilder<> &B, const AttributeList &Attrs) { + assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); + + SmallString<20> NameBuffer; + appendTypeSuffix(Op1, Name, NameBuffer); + + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); +} + +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn, IRBuilder<> &B, + const AttributeList &Attrs) { + // Get the name of the function according to TLI. + StringRef Name = getFloatFnName(TLI, Op1->getType(), + DoubleFn, FloatFn, LongDoubleFn); + + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); +} + Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc_putchar)) diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index df299f673f65..9a6761040bd8 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -448,13 +448,17 @@ bool llvm::bypassSlowDivision(BasicBlock *BB, DivCacheTy PerBBDivCache; bool MadeChange = false; - Instruction* Next = &*BB->begin(); + Instruction *Next = &*BB->begin(); while (Next != nullptr) { // We may add instructions immediately after I, but we want to skip over // them. - Instruction* I = Next; + Instruction *I = Next; Next = Next->getNextNode(); + // Ignore dead code to save time and avoid bugs. + if (I->hasNUses(0)) + continue; + FastDivInsertionTask Task(I, BypassWidths); if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { I->replaceAllUsesWith(Replacement); diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp index 455fcbb1cf98..3c7c8d872595 100644 --- a/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1026c9d37038..75e8963303c2 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -210,6 +210,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, RemapInstruction(&II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); + + // Register all DICompileUnits of the old parent module in the new parent module + auto* OldModule = OldFunc->getParent(); + auto* NewModule = NewFunc->getParent(); + if (OldModule && NewModule && OldModule != NewModule && DIFinder.compile_unit_count()) { + auto* NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + // Avoid multiple insertions of the same DICompileUnit to NMD. + SmallPtrSet<const void*, 8> Visited; + for (auto* Operand : NMD->operands()) + Visited.insert(Operand); + for (auto* Unit : DIFinder.compile_units()) + // VMap.MD()[Unit] == Unit + if (Visited.insert(Unit).second) + NMD->addOperand(Unit); + } } /// Return a copy of the specified function and add it to that function's diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 7ddf59becba9..2c8c3abb2922 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -48,7 +48,7 @@ std::unique_ptr<Module> llvm::CloneModule( function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) { // First off, we need to create the new module. std::unique_ptr<Module> New = - llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext()); + std::make_unique<Module>(M.getModuleIdentifier(), M.getContext()); New->setSourceFileName(M.getSourceFileName()); New->setDataLayout(M.getDataLayout()); New->setTargetTriple(M.getTargetTriple()); @@ -181,13 +181,25 @@ std::unique_ptr<Module> llvm::CloneModule( } // And named metadata.... + const auto* LLVM_DBG_CU = M.getNamedMetadata("llvm.dbg.cu"); for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end(); I != E; ++I) { const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); - for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); + if (&NMD == LLVM_DBG_CU) { + // Do not insert duplicate operands. + SmallPtrSet<const void*, 8> Visited; + for (const auto* Operand : NewNMD->operands()) + Visited.insert(Operand); + for (const auto* Operand : NMD.operands()) { + auto* MappedOperand = MapMetadata(Operand, VMap); + if (Visited.insert(MappedOperand).second) + NewNMD->addOperand(MappedOperand); + } + } else + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index fa6d3f8ae873..0298ff9a395f 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -293,10 +293,8 @@ static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { CommonExitBlock = Succ; continue; } - if (CommonExitBlock == Succ) - continue; - - return true; + if (CommonExitBlock != Succ) + return true; } return false; }; @@ -307,52 +305,79 @@ static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { return CommonExitBlock; } -bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( - Instruction *Addr) const { - AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); - Function *Func = (*Blocks.begin())->getParent(); - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) - continue; - for (Instruction &II : BB) { - if (isa<DbgInfoIntrinsic>(II)) - continue; +CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) { + for (BasicBlock &BB : F) { + for (Instruction &II : BB.instructionsWithoutDebug()) + if (auto *AI = dyn_cast<AllocaInst>(&II)) + Allocas.push_back(AI); - unsigned Opcode = II.getOpcode(); - Value *MemAddr = nullptr; - switch (Opcode) { - case Instruction::Store: - case Instruction::Load: { - if (Opcode == Instruction::Store) { - StoreInst *SI = cast<StoreInst>(&II); - MemAddr = SI->getPointerOperand(); - } else { - LoadInst *LI = cast<LoadInst>(&II); - MemAddr = LI->getPointerOperand(); - } - // Global variable can not be aliased with locals. - if (dyn_cast<Constant>(MemAddr)) - break; - Value *Base = MemAddr->stripInBoundsConstantOffsets(); - if (!isa<AllocaInst>(Base) || Base == AI) - return false; + findSideEffectInfoForBlock(BB); + } +} + +void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) { + for (Instruction &II : BB.instructionsWithoutDebug()) { + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast<StoreInst>(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast<LoadInst>(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast<Constant>(MemAddr)) break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!isa<AllocaInst>(Base)) { + SideEffectingBlocks.insert(&BB); + return; } - default: { - IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); - if (IntrInst) { - if (IntrInst->isLifetimeStartOrEnd()) - break; - return false; - } - // Treat all the other cases conservatively if it has side effects. - if (II.mayHaveSideEffects()) - return false; + BaseMemAddrs[&BB].insert(Base); + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); + if (IntrInst) { + if (IntrInst->isLifetimeStartOrEnd()) + break; + SideEffectingBlocks.insert(&BB); + return; } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) { + SideEffectingBlocks.insert(&BB); + return; } } + } } +} +bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr( + BasicBlock &BB, AllocaInst *Addr) const { + if (SideEffectingBlocks.count(&BB)) + return true; + auto It = BaseMemAddrs.find(&BB); + if (It != BaseMemAddrs.end()) + return It->second.count(Addr); + return false; +} + +bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( + const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const { + AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + if (CEAC.doesBlockContainClobberOfAddr(BB, AI)) + return false; + } return true; } @@ -415,7 +440,8 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { // outline region. If there are not other untracked uses of the address, return // the pair of markers if found; otherwise return a pair of nullptr. CodeExtractor::LifetimeMarkerInfo -CodeExtractor::getLifetimeMarkers(Instruction *Addr, +CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, BasicBlock *ExitBlock) const { LifetimeMarkerInfo Info; @@ -447,7 +473,7 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr, Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); // Do legality check. if ((Info.SinkLifeStart || Info.HoistLifeEnd) && - !isLegalToShrinkwrapLifetimeMarkers(Addr)) + !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr)) return {}; // Check to see if we have a place to do hoisting, if not, bail. @@ -457,7 +483,8 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr, return Info; } -void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, +void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); @@ -478,74 +505,104 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, return true; }; - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) + // Look up allocas in the original function in CodeExtractorAnalysisCache, as + // this is much faster than walking all the instructions. + for (AllocaInst *AI : CEAC.getAllocas()) { + BasicBlock *BB = AI->getParent(); + if (Blocks.count(BB)) continue; - for (Instruction &II : BB) { - auto *AI = dyn_cast<AllocaInst>(&II); - if (!AI) - continue; - LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock); - bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); - if (Moved) { - LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); - SinkCands.insert(AI); - continue; - } + // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca, + // check whether it is actually still in the original function. + Function *AIFunc = BB->getParent(); + if (AIFunc != Func) + continue; - // Follow any bitcasts. - SmallVector<Instruction *, 2> Bitcasts; - SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; - for (User *U : AI->users()) { - if (U->stripInBoundsConstantOffsets() == AI) { - Instruction *Bitcast = cast<Instruction>(U); - LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock); - if (LMI.LifeStart) { - Bitcasts.push_back(Bitcast); - BitcastLifetimeInfo.push_back(LMI); - continue; - } - } + LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock); + bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); + if (Moved) { + LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); + SinkCands.insert(AI); + continue; + } - // Found unknown use of AI. - if (!definedInRegion(Blocks, U)) { - Bitcasts.clear(); - break; + // Follow any bitcasts. + SmallVector<Instruction *, 2> Bitcasts; + SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; + for (User *U : AI->users()) { + if (U->stripInBoundsConstantOffsets() == AI) { + Instruction *Bitcast = cast<Instruction>(U); + LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock); + if (LMI.LifeStart) { + Bitcasts.push_back(Bitcast); + BitcastLifetimeInfo.push_back(LMI); + continue; } } - // Either no bitcasts reference the alloca or there are unknown uses. - if (Bitcasts.empty()) - continue; + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + Bitcasts.clear(); + break; + } + } - LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); - SinkCands.insert(AI); - for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { - Instruction *BitcastAddr = Bitcasts[I]; - const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; - assert(LMI.LifeStart && - "Unsafe to sink bitcast without lifetime markers"); - moveOrIgnoreLifetimeMarkers(LMI); - if (!definedInRegion(Blocks, BitcastAddr)) { - LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr - << "\n"); - SinkCands.insert(BitcastAddr); - } + // Either no bitcasts reference the alloca or there are unknown uses. + if (Bitcasts.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr + << "\n"); + SinkCands.insert(BitcastAddr); } } } } +bool CodeExtractor::isEligible() const { + if (Blocks.empty()) + return false; + BasicBlock *Header = *Blocks.begin(); + Function *F = Header->getParent(); + + // For functions with varargs, check that varargs handling is only done in the + // outlined function, i.e vastart and vaend are only used in outlined blocks. + if (AllowVarArgs && F->getFunctionType()->isVarArg()) { + auto containsVarArgIntrinsic = [](const Instruction &I) { + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (const Function *Callee = CI->getCalledFunction()) + return Callee->getIntrinsicID() == Intrinsic::vastart || + Callee->getIntrinsicID() == Intrinsic::vaend; + return false; + }; + + for (auto &BB : *F) { + if (Blocks.count(&BB)) + continue; + if (llvm::any_of(BB, containsVarArgIntrinsic)) + return false; + } + } + return true; +} + void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &SinkCands) const { for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { - for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) { - Value *V = *OI; + for (auto &OI : II.operands()) { + Value *V = OI; if (!SinkCands.count(V) && definedInCaller(Blocks, V)) Inputs.insert(V); } @@ -904,12 +961,12 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // within the new function. This must be done before we lose track of which // blocks were originally in the code region. std::vector<User *> Users(header->user_begin(), header->user_end()); - for (unsigned i = 0, e = Users.size(); i != e; ++i) + for (auto &U : Users) // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block - if (Instruction *I = dyn_cast<Instruction>(Users[i])) - if (I->isTerminator() && !Blocks.count(I->getParent()) && - I->getParent()->getParent() == oldFunction) + if (Instruction *I = dyn_cast<Instruction>(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) I->replaceUsesOfWith(header, newHeader); return newFunction; @@ -1277,13 +1334,6 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { // Insert this basic block into the new function newBlocks.push_back(Block); - - // Remove @llvm.assume calls that were moved to the new function from the - // old function's assumption cache. - if (AC) - for (auto &I : *Block) - if (match(&I, m_Intrinsic<Intrinsic::assume>())) - AC->unregisterAssumption(cast<CallInst>(&I)); } } @@ -1332,7 +1382,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights( MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } -Function *CodeExtractor::extractCodeRegion() { +Function * +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { if (!isEligible()) return nullptr; @@ -1341,27 +1392,6 @@ Function *CodeExtractor::extractCodeRegion() { BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); - // For functions with varargs, check that varargs handling is only done in the - // outlined function, i.e vastart and vaend are only used in outlined blocks. - if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) { - auto containsVarArgIntrinsic = [](Instruction &I) { - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (const Function *F = CI->getCalledFunction()) - return F->getIntrinsicID() == Intrinsic::vastart || - F->getIntrinsicID() == Intrinsic::vaend; - return false; - }; - - for (auto &BB : *oldFunction) { - if (Blocks.count(&BB)) - continue; - if (llvm::any_of(BB, containsVarArgIntrinsic)) - return nullptr; - } - } - ValueSet inputs, outputs, SinkingCands, HoistingCands; - BasicBlock *CommonExit = nullptr; - // Calculate the entry frequency of the new function before we change the root // block. BlockFrequency EntryFreq; @@ -1375,6 +1405,15 @@ Function *CodeExtractor::extractCodeRegion() { } } + if (AC) { + // Remove @llvm.assume calls that were moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) + for (auto &I : *Block) + if (match(&I, m_Intrinsic<Intrinsic::assume>())) + AC->unregisterAssumption(cast<CallInst>(&I)); + } + // If we have any return instructions in the region, split those blocks so // that the return is not in the region. splitReturnBlocks(); @@ -1428,7 +1467,9 @@ Function *CodeExtractor::extractCodeRegion() { } newFuncRoot->getInstList().push_back(BranchI); - findAllocas(SinkingCands, HoistingCands, CommonExit); + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; + findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); // Find inputs to, outputs from the code region. @@ -1563,5 +1604,17 @@ Function *CodeExtractor::extractCodeRegion() { }); LLVM_DEBUG(if (verifyFunction(*oldFunction)) report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC)) + report_fatal_error("Stale Asumption cache for old Function!")); return newFunction; } + +bool CodeExtractor::verifyAssumptionCache(const Function& F, + AssumptionCache *AC) { + for (auto AssumeVH : AC->assumptions()) { + CallInst *I = cast<CallInst>(AssumeVH); + if (I->getFunction() != &F) + return true; + } + return false; +} diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 4aa40eeadda4..57e2ff0251a9 100644 --- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -24,7 +24,7 @@ static void insertCall(Function &CurFn, StringRef Func, if (Func == "mcount" || Func == ".mcount" || - Func == "\01__gnu_mcount_nc" || + Func == "llvm.arm.gnu.eabi.mcount" || Func == "\01_mcount" || Func == "\01mcount" || Func == "__mcount" || diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 0e203f4e075d..ad36790b8c6a 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -469,7 +469,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); - AllocaTmps.push_back(llvm::make_unique<GlobalVariable>( + AllocaTmps.push_back(std::make_unique<GlobalVariable>( Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal, AI->getType()->getPointerAddressSpace())); diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 0c52e6f3703b..893f23eb6048 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -67,7 +67,7 @@ public: /// Before: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 -/// br i1 %cmp1, label %if.then, label %lor.rhs +/// br i1 %cmp10, label %if.then, label %lor.rhs /// /// lor.rhs: /// ...... @@ -251,8 +251,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { bool EverChanged = false; for (; CurrBlock != FirstCondBlock; CurrBlock = CurrBlock->getSinglePredecessor()) { - BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); - CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + auto *BI = cast<BranchInst>(CurrBlock->getTerminator()); + auto *CI = dyn_cast<CmpInst>(BI->getCondition()); if (!CI) continue; @@ -278,7 +278,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { // Do the transformation. BasicBlock *CB; - BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); + BranchInst *PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); bool Iteration = true; IRBuilder<>::InsertPointGuard Guard(Builder); Value *PC = PBI->getCondition(); @@ -444,7 +444,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { FirstEntryBlock->getInstList().pop_back(); FirstEntryBlock->getInstList() .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); - BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); + BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator()); Value *CC = PBI->getCondition(); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); @@ -453,6 +453,16 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { PBI->replaceUsesOfWith(CC, NC); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + // Handle PHI node to replace its predecessors to FirstEntryBlock. + for (BasicBlock *Succ : successors(PBI)) { + for (PHINode &Phi : Succ->phis()) { + for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) { + if (Phi.getIncomingBlock(i) == SecondEntryBlock) + Phi.setIncomingBlock(i, FirstEntryBlock); + } + } + } + // Remove IfTrue1 if (IfTrue1 != FirstEntryBlock) { IfTrue1->dropAllReferences(); diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index c9cc0990f237..76b4635ad501 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -210,7 +210,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { if (Function *F = dyn_cast<Function>(&GV)) { if (!F->isDeclaration()) { for (auto &S : VI.getSummaryList()) { - FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); + auto *FS = cast<FunctionSummary>(S->getBaseObject()); if (FS->modulePath() == M.getModuleIdentifier()) { F->setEntryCount(Function::ProfileCount(FS->entryCount(), Function::PCT_Synthetic)); diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp index 8041e66e6c4c..ea93f99d69e3 100644 --- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -25,8 +25,8 @@ ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { auto &ValueLookup = NodesMap[F.getName()]; if (!ValueLookup) { - ValueLookup = llvm::make_unique<InlineGraphNode>(); - ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr; + ValueLookup = std::make_unique<InlineGraphNode>(); + ValueLookup->Imported = F.hasMetadata("thinlto_src_module"); } return *ValueLookup; } @@ -64,7 +64,7 @@ void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { if (F.isDeclaration()) continue; AllFunctions++; - ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); + ImportedFunctions += int(F.hasMetadata("thinlto_src_module")); } } static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 8c67d1dc6eb3..ed28fffc22b5 100644 --- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -533,7 +533,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI, } bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; return runImpl(F, TLI, DT); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 39b6b889f91c..5bcd05757ec1 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -324,8 +324,14 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) + // Delete pointer cast instructions. RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); + // Also zap the blockaddress constant if there are no users remaining, + // otherwise the destination is still marked as having its address taken. + if (BA->use_empty()) + BA->destroyConstant(); + // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an // 'unreachable' instruction. @@ -633,17 +639,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, // Control Flow Graph Restructuring. // -/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this -/// method is called when we're about to delete Pred as a predecessor of BB. If -/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. -/// -/// Unlike the removePredecessor method, this attempts to simplify uses of PHI -/// nodes that collapse into identity values. For example, if we have: -/// x = phi(1, 0, 0, 0) -/// y = and x, z -/// -/// .. and delete the predecessor corresponding to the '1', this will attempt to -/// recursively fold the and to 0. void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU) { // This only adjusts blocks with PHI nodes. @@ -672,10 +667,6 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}}); } -/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its -/// predecessor is known to have one successor (DestBB!). Eliminate the edge -/// between them, moving the instructions in the predecessor into DestBB and -/// deleting the predecessor block. void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DomTreeUpdater *DTU) { @@ -755,15 +746,14 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, } } -/// CanMergeValues - Return true if we can choose one of these values to use -/// in place of the other. Note that we will always choose the non-undef -/// value to keep. +/// Return true if we can choose one of these values to use in place of the +/// other. Note that we will always choose the non-undef value to keep. static bool CanMergeValues(Value *First, Value *Second) { return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second); } -/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an -/// almost-empty BB ending in an unconditional branch to Succ, into Succ. +/// Return true if we can fold BB, an almost-empty BB ending in an unconditional +/// branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { @@ -956,11 +946,6 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, replaceUndefValuesInPhi(PN, IncomingValues); } -/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an -/// unconditional branch, and contains no instructions other than PHI nodes, -/// potential side-effect free intrinsics and the branch. If possible, -/// eliminate BB by rewriting all the predecessors to branch to the successor -/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU) { assert(BB != &BB->getParent()->getEntryBlock() && @@ -1088,10 +1073,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI -/// nodes in this block. This doesn't try to be clever about PHI nodes -/// which differ only in the order of the incoming values, but instcombine -/// orders them so it usually won't matter. bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for @@ -1151,10 +1132,10 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// often possible though. If alignment is important, a more reliable approach /// is to simply align all global variables and allocation instructions to /// their preferred alignment from the beginning. -static unsigned enforceKnownAlignment(Value *V, unsigned Align, +static unsigned enforceKnownAlignment(Value *V, unsigned Alignment, unsigned PrefAlign, const DataLayout &DL) { - assert(PrefAlign > Align); + assert(PrefAlign > Alignment); V = V->stripPointerCasts(); @@ -1165,36 +1146,36 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // stripPointerCasts recurses through infinite layers of bitcasts, // while computeKnownBits is not allowed to traverse more than 6 // levels. - Align = std::max(AI->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; + Alignment = std::max(AI->getAlignment(), Alignment); + if (PrefAlign <= Alignment) + return Alignment; // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. - if (DL.exceedsNaturalStackAlignment(PrefAlign)) - return Align; - AI->setAlignment(PrefAlign); + if (DL.exceedsNaturalStackAlignment(Align(PrefAlign))) + return Alignment; + AI->setAlignment(MaybeAlign(PrefAlign)); return PrefAlign; } if (auto *GO = dyn_cast<GlobalObject>(V)) { // TODO: as above, this shouldn't be necessary. - Align = std::max(GO->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; + Alignment = std::max(GO->getAlignment(), Alignment); + if (PrefAlign <= Alignment) + return Alignment; // If there is a large requested alignment and we can, bump up the alignment // of the global. If the memory we set aside for the global may not be the // memory used by the final program then it is impossible for us to reliably // enforce the preferred alignment. if (!GO->canIncreaseAlignment()) - return Align; + return Alignment; - GO->setAlignment(PrefAlign); + GO->setAlignment(MaybeAlign(PrefAlign)); return PrefAlign; } - return Align; + return Alignment; } unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, @@ -1397,7 +1378,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, /// Determine whether this alloca is either a VLA or an array. static bool isArray(AllocaInst *AI) { return AI->isArrayAllocation() || - AI->getType()->getElementType()->isArrayTy(); + (AI->getAllocatedType() && AI->getAllocatedType()->isArrayTy()); +} + +/// Determine whether this alloca is a structure. +static bool isStructure(AllocaInst *AI) { + return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy(); } /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set @@ -1422,7 +1408,7 @@ bool llvm::LowerDbgDeclare(Function &F) { // stored on the stack, while the dbg.declare can only describe // the stack slot (and at a lexical-scope granularity). Later // passes will attempt to elide the stack slot. - if (!AI || isArray(AI)) + if (!AI || isArray(AI) || isStructure(AI)) continue; // A volatile load/store means that the alloca can't be elided anyway. @@ -1591,15 +1577,10 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, DIExpr->getElement(0) != dwarf::DW_OP_deref) return; - // Insert the offset immediately after the first deref. + // Insert the offset before the first deref. // We could just change the offset argument of dbg.value, but it's unsigned... - if (Offset) { - SmallVector<uint64_t, 4> Ops; - Ops.push_back(dwarf::DW_OP_deref); - DIExpression::appendOffset(Ops, Offset); - Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); - DIExpr = Builder.createExpression(Ops); - } + if (Offset) + DIExpr = DIExpression::prepend(DIExpr, 0, Offset); Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI); DVI->eraseFromParent(); @@ -1957,18 +1938,24 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, return NumInstrsRemoved; } -/// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) { - SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); +CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { + SmallVector<Value *, 8> Args(II->arg_begin(), II->arg_end()); SmallVector<OperandBundleDef, 1> OpBundles; II->getOperandBundlesAsDefs(OpBundles); - CallInst *NewCall = CallInst::Create( - II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II); - NewCall->takeName(II); + CallInst *NewCall = CallInst::Create(II->getFunctionType(), + II->getCalledValue(), Args, OpBundles); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); NewCall->setDebugLoc(II->getDebugLoc()); NewCall->copyMetadata(*II); + return NewCall; +} + +/// changeToCall - Convert the specified invoke into a normal call. +void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { + CallInst *NewCall = createCallMatchingInvoke(II); + NewCall->takeName(II); + NewCall->insertBefore(II); II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. @@ -2223,12 +2210,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { /// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo -/// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, - DomTreeUpdater *DTU, +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU) { - SmallPtrSet<BasicBlock*, 16> Reachable; + SmallPtrSet<BasicBlock *, 16> Reachable; bool Changed = markAliveBlocks(F, Reachable, DTU); // If there are unreachable blocks in the CFG... @@ -2236,21 +2221,21 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, return Changed; assert(Reachable.size() < F.size()); - NumRemoved += F.size()-Reachable.size(); + NumRemoved += F.size() - Reachable.size(); SmallSetVector<BasicBlock *, 8> DeadBlockSet; - for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { - auto *BB = &*I; - if (Reachable.count(BB)) + for (BasicBlock &BB : F) { + // Skip reachable basic blocks + if (Reachable.find(&BB) != Reachable.end()) continue; - DeadBlockSet.insert(BB); + DeadBlockSet.insert(&BB); } if (MSSAU) MSSAU->removeBlocks(DeadBlockSet); // Loop over all of the basic blocks that are not reachable, dropping all of - // their internal references. Update DTU and LVI if available. + // their internal references. Update DTU if available. std::vector<DominatorTree::UpdateType> Updates; for (auto *BB : DeadBlockSet) { for (BasicBlock *Successor : successors(BB)) { @@ -2259,26 +2244,18 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, if (DTU) Updates.push_back({DominatorTree::Delete, BB, Successor}); } - if (LVI) - LVI->eraseBlock(BB); BB->dropAllReferences(); - } - for (Function::iterator I = ++F.begin(); I != F.end();) { - auto *BB = &*I; - if (Reachable.count(BB)) { - ++I; - continue; - } if (DTU) { - // Remove the terminator of BB to clear the successor list of BB. - if (BB->getTerminator()) - BB->getInstList().pop_back(); + Instruction *TI = BB->getTerminator(); + assert(TI && "Basic block should have a terminator"); + // Terminators like invoke can have users. We have to replace their users, + // before removing them. + if (!TI->use_empty()) + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); assert(succ_empty(BB) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - ++I; - } else { - I = F.getBasicBlockList().erase(I); } } @@ -2294,7 +2271,11 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, } if (!Deleted) return false; + } else { + for (auto *BB : DeadBlockSet) + BB->eraseFromParent(); } + return true; } @@ -2363,6 +2344,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; + case LLVMContext::MD_preserve_access_index: + // Preserve !preserve.access.index in K. + break; } } // Set !invariant.group from J if J has it. If both instructions have it @@ -2385,10 +2369,61 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(K, J, KnownIDs, KDominatesJ); } +void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) { + SmallVector<std::pair<unsigned, MDNode *>, 8> MD; + Source.getAllMetadata(MD); + MDBuilder MDB(Dest.getContext()); + Type *NewType = Dest.getType(); + const DataLayout &DL = Source.getModule()->getDataLayout(); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a load instruction changing *only its type*. + // The only metadata it makes sense to drop is metadata which is invalidated + // when the pointer type changes. This should essentially never be the case + // in LLVM, but we explicitly switch over only known metadata to be + // conservatively correct. If you are adding metadata to LLVM which pertains + // to loads, you almost certainly want to add it here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + case LLVMContext::MD_access_group: + // All of these directly apply. + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_nonnull: + copyNonnullMetadata(Source, N, Dest); + break; + + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These only directly apply if the new type is also a pointer. + if (NewType->isPointerTy()) + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_range: + copyRangeMetadata(DL, Source, N, Dest); + break; + } + } +} + void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { auto *ReplInst = dyn_cast<Instruction>(Repl); if (!ReplInst) @@ -2417,7 +2452,7 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(ReplInst, I, KnownIDs, false); } diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp index 37389a695b45..889ea5ca9970 100644 --- a/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -615,30 +615,9 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) { LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " << LastExit->getName() << "\n"); - // Hoist the instructions from Latch into LastExit. - Instruction *FirstLatchInst = &*(Latch->begin()); - LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), - Latch->begin(), Jmp->getIterator()); - - // Update MemorySSA - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst); - - unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; - BasicBlock *Header = Jmp->getSuccessor(0); - assert(Header == L->getHeader() && "expected a backward branch"); - - // Remove Latch from the CFG so that LastExit becomes the new Latch. - BI->setSuccessor(FallThruPath, Header); - Latch->replaceSuccessorsPhiUsesWith(LastExit); - Jmp->eraseFromParent(); - - // Nuke the Latch block. - assert(Latch->empty() && "unable to evacuate Latch"); - LI->removeBlock(Latch); - if (DT) - DT->eraseNode(Latch); - Latch->eraseFromParent(); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr, + /*PredecessorWithTwoSuccessors=*/true); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 7e6da02d5707..d0f89dc54bfb 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -808,7 +808,7 @@ bool LoopSimplify::runOnFunction(Function &F) { auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); if (MSSAAnalysis) { MSSA = &MSSAAnalysis->getMSSA(); - MSSAU = make_unique<MemorySSAUpdater>(MSSA); + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); } } @@ -835,12 +835,19 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); + auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(F); + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (MSSAAnalysis) { + auto *MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + } + // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA - // after simplifying the loops. MemorySSA is not preserved either. + // after simplifying the loops. MemorySSA is preserved if it exists. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) Changed |= - simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false); + simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); @@ -853,6 +860,8 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, PA.preserve<SCEVAA>(); PA.preserve<ScalarEvolutionAnalysis>(); PA.preserve<DependenceAnalysis>(); + if (MSSAAnalysis) + PA.preserve<MemorySSAAnalysis>(); // BPI maps conditional terminators to probabilities, LoopSimplify can insert // blocks, but it does so only by splitting existing blocks and edges. This // results in the interesting property that all new terminators inserted are diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index e39ade523714..a7590fc32545 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -711,7 +711,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, ArrayRef<BasicBlock *> NextBlocks, - BasicBlock *CurrentHeader, + BasicBlock *BlockInLoop, bool NeedConditional) { auto *Term = cast<BranchInst>(Src->getTerminator()); if (NeedConditional) { @@ -723,7 +723,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (Dest != LoopExit) { BasicBlock *BB = Src; for (BasicBlock *Succ : successors(BB)) { - if (Succ == CurrentHeader) + // Preserve the incoming value from BB if we are jumping to the block + // in the current loop. + if (Succ == BlockInLoop) continue; for (PHINode &Phi : Succ->phis()) Phi.removeIncomingValue(BB, false); @@ -794,7 +796,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // unconditional branch for some iterations. NeedConditional = false; - setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional); + setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional); } // Set up latches to branch to the new header in the unrolled iterations or @@ -868,7 +870,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, assert(!DT || !UnrollVerifyDomtree || DT->verify(DominatorTree::VerificationLevel::Fast)); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); // Merge adjacent basic blocks, if possible. for (BasicBlock *Latch : Latches) { BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); @@ -888,6 +890,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } } + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp index ff49d83f25c5..bf2e87b0d49f 100644 --- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -517,6 +517,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop( movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); } + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the // new ones required. if (Count != 1) { @@ -530,7 +531,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop( ForeBlocksLast.back(), SubLoopBlocksFirst[0]); DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, SubLoopBlocksLast.back(), AftBlocksFirst[0]); - DT->applyUpdates(DTUpdates); + DTU.applyUpdatesPermissive(DTUpdates); } // Merge adjacent basic blocks, if possible. @@ -538,7 +539,6 @@ LoopUnrollResult llvm::UnrollAndJamLoop( MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); while (!MergeBlocks.empty()) { BasicBlock *BB = *MergeBlocks.begin(); BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); @@ -555,6 +555,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop( } else MergeBlocks.erase(BB); } + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 005306cf1898..58e42074f963 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -62,9 +62,11 @@ static cl::opt<unsigned> UnrollForcePeelCount( cl::desc("Force a peel count regardless of profiling information.")); static cl::opt<bool> UnrollPeelMultiDeoptExit( - "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden, + "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, cl::desc("Allow peeling of loops with multiple deopt exits.")); +static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -275,6 +277,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount << " iterations.\n"); UP.PeelCount = UnrollForcePeelCount; + UP.PeelProfiledIterations = true; return; } @@ -282,6 +285,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!UP.AllowPeeling) return; + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + // Stop if we already peeled off the maximum number of iterations. + if (AlreadyPeeled >= UnrollPeelMaxCount) + return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N // iterations of the loop. For this we compute the number for iterations after // which every Phi is guaranteed to become an invariant, and try to peel the @@ -317,11 +327,14 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - UP.PeelCount = DesiredPeelCount; - return; + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; + UP.PeelProfiledIterations = false; + return; + } } } @@ -330,6 +343,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (TripCount) return; + // Do not apply profile base peeling if it is disabled. + if (!UP.PeelProfiledIterations) + return; // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. @@ -344,7 +360,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, << "\n"); if (*PeelCount) { - if ((*PeelCount <= UnrollPeelMaxCount) && + if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); @@ -352,6 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, return; } LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); @@ -364,88 +381,77 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop /// iteration correctly. -/// Our goal is to make sure that: -/// a) The total weight of all the copies of the loop body is preserved. -/// b) The total weight of the loop exit is preserved. -/// c) The body weight is reasonably distributed between the peeled iterations. +/// Let F is a weight of the edge from latch to header. +/// Let E is a weight of the edge from latch to exit. +/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to +/// go to exit. +/// Then, Estimated TripCount = F / E. +/// For I-th (counting from 0) peeled off iteration we set the the weights for +/// the peeled latch as (TC - I, 1). It gives us reasonable distribution, +/// The probability to go to exit 1/(TC-I) increases. At the same time +/// the estimated trip count of remaining loop reduces by I. +/// To avoid dealing with division rounding we can just multiple both part +/// of weights to E and use weight as (F - I * E, E). /// /// \param Header The copy of the header block that belongs to next iteration. /// \param LatchBR The copy of the latch branch that belongs to this iteration. -/// \param IterNumber The serial number of the iteration that was just -/// peeled off. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop -/// iterations that are unaccounted for. As an input, it represents the number -/// of times we expect to enter the header of the iteration currently being -/// peeled off. The output is the number of times we expect to enter the -/// header of the next iteration. +/// \param[in,out] FallThroughWeight The weight of the edge from latch to +/// header before peeling (in) and after peeled off one iteration (out). static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned IterNumber, unsigned AvgIters, - uint64_t &PeeledHeaderWeight) { - if (!PeeledHeaderWeight) + uint64_t ExitWeight, + uint64_t &FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // FIXME: Pick a more realistic distribution. - // Currently the proportion of weight we assign to the fall-through - // side of the branch drops linearly with the iteration number, and we use - // a 0.9 fudge factor to make the drop-off less sharp... - uint64_t FallThruWeight = - PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); - uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; - PeeledHeaderWeight -= ExitWeight; unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) - : MDB.createBranchWeights(FallThruWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + FallThroughWeight = + FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1; } /// Initialize the weights. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken. -/// \param[out] CurHeaderWeight The # of times the header is executed. +/// \param[out] ExitWeight The weight of the edge from Latch to Exit. +/// \param[out] FallThroughWeight The weight of the edge from Latch to Header. static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned AvgIters, uint64_t &ExitWeight, - uint64_t &CurHeaderWeight) { + uint64_t &ExitWeight, + uint64_t &FallThroughWeight) { uint64_t TrueWeight, FalseWeight; if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) return; unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - // The # of times the loop body executes is the sum of the exit block - // is taken and the # of times the backedges are taken. - CurHeaderWeight = TrueWeight + FalseWeight; + FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight; } /// Update the weights of original Latch block after peeling off all iterations. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param ExitWeight The weight of the edge from Latch to Exit block. -/// \param CurHeaderWeight The # of time the header is executed. +/// \param ExitWeight The weight of the edge from Latch to Exit. +/// \param FallThroughWeight The weight of the edge from Latch to Header. static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, uint64_t CurHeaderWeight) { - // Adjust the branch weights on the loop exit. - if (!ExitWeight) + uint64_t ExitWeight, + uint64_t FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // The backedge count is the difference of current header weight and - // current loop exit weight. If the current header weight is smaller than - // the current loop exit weight, we mark the loop backedge weight as 1. - uint64_t BackEdgeWeight = 0; - if (ExitWeight < CurHeaderWeight) - BackEdgeWeight = CurHeaderWeight - ExitWeight; - else - BackEdgeWeight = 1; + // Sets the branch weights on the loop exit. MDBuilder MDB(LatchBR->getContext()); unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } @@ -586,11 +592,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, DenseMap<BasicBlock *, BasicBlock *> ExitIDom; if (DT) { + // We'd like to determine the idom of exit block after peeling one + // iteration. + // Let Exit is exit block. + // Let ExitingSet - is a set of predecessors of Exit block. They are exiting + // blocks. + // Let Latch' and ExitingSet' are copies after a peeling. + // We'd like to find an idom'(Exit) - idom of Exit after peeling. + // It is an evident that idom'(Exit) will be the nearest common dominator + // of ExitingSet and ExitingSet'. + // idom(Exit) is a nearest common dominator of ExitingSet. + // idom(Exit)' is a nearest common dominator of ExitingSet'. + // Taking into account that we have a single Latch, Latch' will dominate + // Header and idom(Exit). + // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. + // All these basic blocks are in the same loop, so what we find is + // (nearest common dominator of idom(Exit) and Latch)'. + // In the loop below we remember nearest common dominator of idom(Exit) and + // Latch to update idom of Exit later. assert(L->hasDedicatedExits() && "No dedicated exits?"); for (auto Edge : ExitEdges) { if (ExitIDom.count(Edge.second)) continue; - BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock(); + BasicBlock *BB = DT->findNearestCommonDominator( + DT->getNode(Edge.second)->getIDom()->getBlock(), Latch); assert(L->contains(BB) && "IDom is not in a loop"); ExitIDom[Edge.second] = BB; } @@ -659,23 +684,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // newly created branches. BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); - uint64_t ExitWeight = 0, CurHeaderWeight = 0; - initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight); + uint64_t ExitWeight = 0, FallThroughWeight = 0; + initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; ValueToValueMapTy VMap; - // Subtract the exit weight from the current header weight -- the exit - // weight is exactly the weight of the previous iteration's header. - // FIXME: due to the way the distribution is constructed, we need a - // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < CurHeaderWeight) - CurHeaderWeight -= ExitWeight; - else - CurHeaderWeight = 1; - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); @@ -697,8 +713,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, } auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); - updateBranchWeights(InsertBot, LatchBRCopy, Iter, - PeelCount, ExitWeight); + updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); @@ -724,7 +739,13 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight); + fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + + // Update Metadata for count of peeled off iterations. + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index ec226e65f650..b4d7f35d2d9a 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -45,6 +46,7 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-utils" static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; +static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -169,6 +171,8 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<SCEVAAWrapperPass>(); AU.addRequired<ScalarEvolutionWrapperPass>(); AU.addPreserved<ScalarEvolutionWrapperPass>(); + // FIXME: When all loop passes preserve MemorySSA, it can be required and + // preserved here instead of the individual handling in each pass. } /// Manually defined generic "LoopPass" dependency initialization. This is used @@ -189,6 +193,54 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) + INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +} + +/// Create MDNode for input string. +static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + Metadata *MDs[] = { + MDString::get(Context, Name), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; + return MDNode::get(Context, MDs); +} + +/// Set input string into loop metadata by keeping other values intact. +/// If the string is already in loop metadata update value if it is +/// different. +void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, + unsigned V) { + SmallVector<Metadata *, 4> MDs(1); + // If the loop already has metadata, retain it. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast<MDNode>(LoopID->getOperand(i)); + // If it is of form key = value, try to parse it. + if (Node->getNumOperands() == 2) { + MDString *S = dyn_cast<MDString>(Node->getOperand(0)); + if (S && S->getString().equals(StringMD)) { + ConstantInt *IntMD = + mdconst::extract_or_null<ConstantInt>(Node->getOperand(1)); + if (IntMD && IntMD->getSExtValue() == V) + // It is already in place. Do nothing. + return; + // We need to update the value, so just skip it here and it will + // be added after copying other existed nodes. + continue; + } + } + MDs.push_back(Node); + } + } + // Add new metadata. + MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); + // Replace current metadata node with new one. + LLVMContext &Context = TheLoop->getHeader()->getContext(); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + TheLoop->setLoopID(NewLoopID); } /// Find string metadata for loop @@ -332,6 +384,10 @@ bool llvm::hasDisableAllTransformsHint(const Loop *L) { return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); } +bool llvm::hasDisableLICMTransformsHint(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); +} + TransformationMode llvm::hasUnrollTransformation(Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp index a9a480a4b7f9..5d7759056c7d 100644 --- a/lib/Transforms/Utils/LoopVersioning.cpp +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -92,8 +92,8 @@ void LoopVersioning::versionLoop( // Create empty preheader for the loop (and after cloning for the // non-versioned loop). BasicBlock *PH = - SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI); - PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); + SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI, + nullptr, VersionedLoop->getHeader()->getName() + ".ph"); // Clone the loop including the preheader. // diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index c0b7edc547fd..60bb2775a194 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -121,15 +121,14 @@ namespace { } // Rename all functions - const TargetLibraryInfo &TLI = - getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); for (auto &F : M) { StringRef Name = F.getName(); LibFunc Tmp; // Leave library functions alone because their presence or absence could // affect the behavior of other passes. if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || - TLI.getLibFunc(F, Tmp)) + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F).getLibFunc( + F, Tmp)) continue; // Leave @main alone. The output of -metarenamer might be passed to diff --git a/lib/Transforms/Utils/MisExpect.cpp b/lib/Transforms/Utils/MisExpect.cpp new file mode 100644 index 000000000000..26d3402bd279 --- /dev/null +++ b/lib/Transforms/Utils/MisExpect.cpp @@ -0,0 +1,177 @@ +//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of the +// llvm.expect intrinsic. This utility extracts the threshold values from +// metadata associated with the instrumented Branch or Switch instruction. The +// threshold values are then used to determine if a warning should be emmited. +// +// MisExpect metadata is generated when llvm.expect intrinsics are lowered see +// LowerExpectIntrinsic.cpp +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MisExpect.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include <cstdint> +#include <functional> +#include <numeric> + +#define DEBUG_TYPE "misexpect" + +using namespace llvm; +using namespace misexpect; + +namespace llvm { + +// Command line option to enable/disable the warning when profile data suggests +// a mismatch with the use of the llvm.expect intrinsic +static cl::opt<bool> PGOWarnMisExpect( + "pgo-warn-misexpect", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about incorrect usage of llvm.expect intrinsics.")); + +} // namespace llvm + +namespace { + +Instruction *getOprndOrInst(Instruction *I) { + assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); + Instruction *Ret = nullptr; + if (auto *B = dyn_cast<BranchInst>(I)) { + Ret = dyn_cast<Instruction>(B->getCondition()); + } + // TODO: Find a way to resolve condition location for switches + // Using the condition of the switch seems to often resolve to an earlier + // point in the program, i.e. the calculation of the switch condition, rather + // than the switches location in the source code. Thus, we should use the + // instruction to get source code locations rather than the condition to + // improve diagnostic output, such as the caret. If the same problem exists + // for branch instructions, then we should remove this function and directly + // use the instruction + // + // else if (auto S = dyn_cast<SwitchInst>(I)) { + // Ret = I; + //} + return Ret ? Ret : I; +} + +void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, + uint64_t ProfCount, uint64_t TotalCount) { + double PercentageCorrect = (double)ProfCount / TotalCount; + auto PerString = + formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); + auto RemStr = formatv( + "Potential performance regression from use of the llvm.expect intrinsic: " + "Annotation was correct on {0} of profiled executions.", + PerString); + Twine Msg(PerString); + Instruction *Cond = getOprndOrInst(I); + if (PGOWarnMisExpect) + Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); + OptimizationRemarkEmitter ORE(I->getParent()->getParent()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); +} + +} // namespace + +namespace llvm { +namespace misexpect { + +void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights, + LLVMContext &Ctx) { + if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) { + auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0)); + if (MisExpectDataName && + MisExpectDataName->getString().equals("misexpect")) { + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + LLVM_DEBUG(llvm::dbgs() + << "Function: " << I->getFunction()->getName() << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n"); + LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) { + llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n"; + }); + + // extract values from misexpect metadata + const auto *IndexCint = + mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1)); + const auto *LikelyCInt = + mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2)); + const auto *UnlikelyCInt = + mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3)); + + if (!IndexCint || !LikelyCInt || !UnlikelyCInt) + return; + + const uint64_t Index = IndexCint->getZExtValue(); + const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue(); + const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue(); + const uint64_t ProfileCount = Weights[Index]; + const uint64_t CaseTotal = std::accumulate( + Weights.begin(), Weights.end(), (uint64_t)0, std::plus<uint64_t>()); + const uint64_t NumUnlikelyTargets = Weights.size() - 1; + + const uint64_t TotalBranchWeight = + LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); + + const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight, + TotalBranchWeight); + uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal); + + LLVM_DEBUG(llvm::dbgs() + << "Unlikely Targets: " << NumUnlikelyTargets << ":\n"); + LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n"); + LLVM_DEBUG(llvm::dbgs() + << "Scaled Threshold: " << ScaledThreshold << ":\n"); + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + if (ProfileCount < ScaledThreshold) + emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal); + } + } +} + +void checkFrontendInstrumentation(Instruction &I) { + if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) { + unsigned NOps = MD->getNumOperands(); + + // Only emit misexpect diagnostics if at least 2 branch weights are present. + // Less than 2 branch weights means that the profiling metadata is: + // 1) incorrect/corrupted + // 2) not branch weight metadata + // 3) completely deterministic + // In these cases we should not emit any diagnostic related to misexpect. + if (NOps < 3) + return; + + // Operand 0 is a string tag "branch_weights" + if (MDString *Tag = cast<MDString>(MD->getOperand(0))) { + if (Tag->getString().equals("branch_weights")) { + SmallVector<uint32_t, 4> RealWeights(NOps - 1); + for (unsigned i = 1; i < NOps; i++) { + ConstantInt *Value = + mdconst::dyn_extract<ConstantInt>(MD->getOperand(i)); + RealWeights[i - 1] = Value->getZExtValue(); + } + verifyMisExpect(&I, RealWeights, I.getContext()); + } + } + } +} + +} // namespace misexpect +} // namespace llvm +#undef DEBUG_TYPE diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index c84beceee191..1ef3757017a8 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -73,7 +73,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> SmallPtrSet<Constant *, 16> InitAsSet; SmallVector<Constant *, 16> Init; if (GV) { - ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + auto *CA = cast<ConstantArray>(GV->getInitializer()); for (auto &Op : CA->operands()) { Constant *C = cast_or_null<Constant>(Op); if (InitAsSet.insert(C).second) diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index bdf24d80bd17..44859eafb9c1 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -125,8 +125,10 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A, // necessary to compare uses/defs in the same block. Doing so allows us to walk // the minimum number of instructions necessary to compute our def/use ordering. struct ValueDFS_Compare { + DominatorTree &DT; OrderedInstructions &OI; - ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {} + ValueDFS_Compare(DominatorTree &DT, OrderedInstructions &OI) + : DT(DT), OI(OI) {} bool operator()(const ValueDFS &A, const ValueDFS &B) const { if (&A == &B) @@ -136,7 +138,9 @@ struct ValueDFS_Compare { // comesbefore to see what the real ordering is, because they are in the // same basic block. - bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut); + assert((A.DFSIn != B.DFSIn || A.DFSOut == B.DFSOut) && + "Equal DFS-in numbers imply equal out numbers"); + bool SameBlock = A.DFSIn == B.DFSIn; // We want to put the def that will get used for a given set of phi uses, // before those phi uses. @@ -145,9 +149,11 @@ struct ValueDFS_Compare { if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) return comparePHIRelated(A, B); + bool isADef = A.Def; + bool isBDef = B.Def; if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) - return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) < - std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U); + return std::tie(A.DFSIn, A.LocalNum, isADef) < + std::tie(B.DFSIn, B.LocalNum, isBDef); return localComesBefore(A, B); } @@ -164,10 +170,35 @@ struct ValueDFS_Compare { // For two phi related values, return the ordering. bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { - auto &ABlockEdge = getBlockEdge(A); - auto &BBlockEdge = getBlockEdge(B); - // Now sort by block edge and then defs before uses. - return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U); + BasicBlock *ASrc, *ADest, *BSrc, *BDest; + std::tie(ASrc, ADest) = getBlockEdge(A); + std::tie(BSrc, BDest) = getBlockEdge(B); + +#ifndef NDEBUG + // This function should only be used for values in the same BB, check that. + DomTreeNode *DomASrc = DT.getNode(ASrc); + DomTreeNode *DomBSrc = DT.getNode(BSrc); + assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn && + "DFS numbers for A should match the ones of the source block"); + assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn && + "DFS numbers for B should match the ones of the source block"); + assert(A.DFSIn == B.DFSIn && "Values must be in the same block"); +#endif + (void)ASrc; + (void)BSrc; + + // Use DFS numbers to compare destination blocks, to guarantee a + // deterministic order. + DomTreeNode *DomADest = DT.getNode(ADest); + DomTreeNode *DomBDest = DT.getNode(BDest); + unsigned AIn = DomADest->getDFSNumIn(); + unsigned BIn = DomBDest->getDFSNumIn(); + bool isADef = A.Def; + bool isBDef = B.Def; + assert((!A.Def || !A.U) && (!B.Def || !B.U) && + "Def and U cannot be set at the same time"); + // Now sort by edge destination and then defs before uses. + return std::tie(AIn, isADef) < std::tie(BIn, isBDef); } // Get the definition of an instruction that occurs in the middle of a block. @@ -306,10 +337,11 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { } // Add Op, PB to the list of value infos for Op, and mark Op to be renamed. -void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op, +void PredicateInfo::addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op, PredicateBase *PB) { - OpsToRename.insert(Op); auto &OperandInfo = getOrCreateValueInfo(Op); + if (OperandInfo.Infos.empty()) + OpsToRename.push_back(Op); AllInfos.push_back(PB); OperandInfo.Infos.push_back(PB); } @@ -317,7 +349,7 @@ void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op, // Process an assume instruction and place relevant operations we want to rename // into OpsToRename. void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, - SmallPtrSetImpl<Value *> &OpsToRename) { + SmallVectorImpl<Value *> &OpsToRename) { // See if we have a comparison we support SmallVector<Value *, 8> CmpOperands; SmallVector<Value *, 2> ConditionsToProcess; @@ -357,7 +389,7 @@ void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, // Process a block terminating branch, and place relevant operations to be // renamed into OpsToRename. void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, - SmallPtrSetImpl<Value *> &OpsToRename) { + SmallVectorImpl<Value *> &OpsToRename) { BasicBlock *FirstBB = BI->getSuccessor(0); BasicBlock *SecondBB = BI->getSuccessor(1); SmallVector<BasicBlock *, 2> SuccsToProcess; @@ -427,7 +459,7 @@ void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, // Process a block terminating switch, and place relevant operations to be // renamed into OpsToRename. void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB, - SmallPtrSetImpl<Value *> &OpsToRename) { + SmallVectorImpl<Value *> &OpsToRename) { Value *Op = SI->getCondition(); if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse()) return; @@ -457,7 +489,7 @@ void PredicateInfo::buildPredicateInfo() { DT.updateDFSNumbers(); // Collect operands to rename from all conditional branch terminators, as well // as assume statements. - SmallPtrSet<Value *, 8> OpsToRename; + SmallVector<Value *, 8> OpsToRename; for (auto DTN : depth_first(DT.getRootNode())) { BasicBlock *BranchBB = DTN->getBlock(); if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) { @@ -524,7 +556,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, if (isa<PredicateWithEdge>(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); @@ -536,7 +568,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, "Should not have gotten here without it being an assume"); IRBuilder<> B(PAssume->AssumeInst); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PredicateMap.insert({PIC, ValInfo}); @@ -565,14 +597,8 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, // // TODO: Use this algorithm to perform fast single-variable renaming in // promotememtoreg and memoryssa. -void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { - // Sort OpsToRename since we are going to iterate it. - SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end()); - auto Comparator = [&](const Value *A, const Value *B) { - return valueComesBefore(OI, A, B); - }; - llvm::sort(OpsToRename, Comparator); - ValueDFS_Compare Compare(OI); +void PredicateInfo::renameUses(SmallVectorImpl<Value *> &OpsToRename) { + ValueDFS_Compare Compare(DT, OI); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n"); @@ -772,7 +798,7 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto PredInfo = make_unique<PredicateInfo>(F, DT, AC); + auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); @@ -786,7 +812,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); OS << "PredicateInfo for function: " << F.getName() << "\n"; - auto PredInfo = make_unique<PredicateInfo>(F, DT, AC); + auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); PredInfo->print(OS); replaceCreatedSSACopys(*PredInfo, F); @@ -845,7 +871,7 @@ PreservedAnalyses PredicateInfoVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); - make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo(); + std::make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo(); return PreservedAnalyses::all(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 11651d040dc0..3a5e3293ed4f 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -94,6 +94,12 @@ static cl::opt<unsigned> PHINodeFoldingThreshold( cl::desc( "Control the amount of phi node folding to perform (default = 2)")); +static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold( + "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), + cl::desc("Control the maximal total instruction cost that we are willing " + "to speculatively execute to fold a 2-entry PHI node into a " + "select (default = 4)")); + static cl::opt<bool> DupRet( "simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -332,7 +338,7 @@ static unsigned ComputeSpeculationCost(const User *I, /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl<Instruction *> &AggressiveInsts, - unsigned &CostRemaining, + int &BudgetRemaining, const TargetTransformInfo &TTI, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), @@ -375,7 +381,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = ComputeSpeculationCost(I, TTI); + BudgetRemaining -= ComputeSpeculationCost(I, TTI); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). @@ -383,17 +389,14 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // or other expensive operation. The speculation of an expensive instruction // is expected to be undone in CodeGenPrepare if the speculation has not // enabled further IR optimizations. - if (Cost > CostRemaining && + if (BudgetRemaining < 0 && (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0)) return false; - // Avoid unsigned wrap. - CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; - // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, + if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. @@ -629,8 +632,7 @@ private: /// vector. /// One "Extra" case is allowed to differ from the other. void gather(Value *V) { - Instruction *I = dyn_cast<Instruction>(V); - bool isEQ = (I->getOpcode() == Instruction::Or); + bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or); // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector<Value *, 8> DFT; @@ -1313,7 +1315,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, + LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); // I1 and I2 are being combined into a single instruction. Its debug @@ -1420,6 +1423,20 @@ HoistTerminator: return true; } +// Check lifetime markers. +static bool isLifeTimeMarker(const Instruction *I) { + if (auto II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + return false; +} + // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -1474,20 +1491,25 @@ static bool canSinkInstructions( return false; } - // Because SROA can't handle speculating stores of selects, try not - // to sink loads or stores of allocas when we'd have to create a PHI for - // the address operand. Also, because it is likely that loads or stores - // of allocas will disappear when Mem2Reg/SROA is run, don't sink them. + // Because SROA can't handle speculating stores of selects, try not to sink + // loads, stores or lifetime markers of allocas when we'd have to create a + // PHI for the address operand. Also, because it is likely that loads or + // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink + // them. // This can cause code churn which can have unintended consequences down // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. // FIXME: This is a workaround for a deficiency in SROA - see // https://llvm.org/bugs/show_bug.cgi?id=30188 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) { - return isa<AllocaInst>(I->getOperand(1)); + return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts()); })) return false; if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) { - return isa<AllocaInst>(I->getOperand(0)); + return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts()); + })) + return false; + if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts()); })) return false; @@ -1959,7 +1981,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics; - unsigned SpeculationCost = 0; + unsigned SpeculatedInstructions = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; for (BasicBlock::iterator BBI = ThenBB->begin(), @@ -1974,8 +1996,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Only speculatively execute a single instruction (not counting the // terminator) for now. - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. @@ -2012,8 +2034,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, E = SinkCandidateUseCounts.end(); I != E; ++I) if (I->first->hasNUses(I->second)) { - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; } @@ -2053,8 +2075,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the // constant expression. - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; } @@ -2302,10 +2324,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction *, 4> AggressiveInsts; - unsigned MaxCostVal0 = PHINodeFoldingThreshold, - MaxCostVal1 = PHINodeFoldingThreshold; - MaxCostVal0 *= TargetTransformInfo::TCC_Basic; - MaxCostVal1 *= TargetTransformInfo::TCC_Basic; + int BudgetRemaining = + TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); @@ -2316,9 +2336,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, - MaxCostVal0, TTI) || + BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, - MaxCostVal1, TTI)) + BudgetRemaining, TTI)) return false; } @@ -2328,12 +2348,24 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, if (!PN) return true; - // Don't fold i1 branches on PHIs which contain binary operators. These can - // often be turned into switches and other things. + // Return true if at least one of these is a 'not', and another is either + // a 'not' too, or a constant. + auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) { + if (!match(V0, m_Not(m_Value()))) + std::swap(V0, V1); + auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant()); + return match(V0, m_Not(m_Value())) && match(V1, Invertible); + }; + + // Don't fold i1 branches on PHIs which contain binary operators, unless one + // of the incoming values is an 'not' and another one is freely invertible. + // These can often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && (isa<BinaryOperator>(PN->getIncomingValue(0)) || isa<BinaryOperator>(PN->getIncomingValue(1)) || - isa<BinaryOperator>(IfCond))) + isa<BinaryOperator>(IfCond)) && + !CanHoistNotFromBothValues(PN->getIncomingValue(0), + PN->getIncomingValue(1))) return false; // If all PHI nodes are promotable, check to make sure that all instructions @@ -2368,6 +2400,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, return false; } } + assert(DomBlock && "Failed to find root DomBlock"); LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() @@ -2913,42 +2946,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, - const DataLayout &DL) { - auto IsaBitcastOfPointerType = [](const Instruction &I) { - return Operator::getOpcode(&I) == Instruction::BitCast && - I.getType()->isPointerTy(); - }; - - // If we're not in aggressive mode, we only optimize if we have some - // confidence that by optimizing we'll allow P and/or Q to be if-converted. - auto IsWorthwhile = [&](BasicBlock *BB) { - if (!BB) - return true; - // Heuristic: if the block can be if-converted/phi-folded and the - // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to - // thread this store. - unsigned N = 0; - for (auto &I : BB->instructionsWithoutDebug()) { - // Cheap instructions viable for folding. - if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) || - isa<StoreInst>(I)) - ++N; - // Free instructions. - else if (I.isTerminator() || IsaBitcastOfPointerType(I)) - continue; - else - return false; - } - // The store we want to merge is counted in N, so add 1 to make sure - // we're counting the instructions that would be left. - return N <= (PHINodeFoldingThreshold + 1); - }; - - if (!MergeCondStoresAggressively && - (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) || - !IsWorthwhile(QFB))) - return false; - + const DataLayout &DL, + const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. @@ -2989,6 +2988,46 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, if (&*I != PStore && I->mayReadOrWriteMemory()) return false; + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + for (auto &I : BB->instructionsWithoutDebug()) { + // Consider terminator instruction to be free. + if (I.isTerminator()) + continue; + // If this is one the stores that we want to speculate out of this BB, + // then don't count it's cost, consider it to be free. + if (auto *S = dyn_cast<StoreInst>(&I)) + if (llvm::find(FreeStores, S)) + continue; + // Else, we have a white-list of instructions that we are ak speculating. + if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I)) + return false; // Not in white-list - not worthwhile folding. + // And finally, if this is a non-free instruction that we are okay + // speculating, ensure that we consider the speculation budget. + BudgetRemaining -= TTI.getUserCost(&I); + if (BudgetRemaining < 0) + return false; // Eagerly refuse to fold as soon as we're out of budget. + } + assert(BudgetRemaining >= 0 && + "When we run out of budget we will eagerly return from within the " + "per-instruction loop."); + return true; + }; + + const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore}; + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || + !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) + return false; + // If PostBB has more than two predecessors, we need to split it so we can // sink the store. if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { @@ -3048,15 +3087,15 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, // store that doesn't execute. if (MinAlignment != 0) { // Choose the minimum of all non-zero alignments. - SI->setAlignment(MinAlignment); + SI->setAlignment(Align(MinAlignment)); } else if (MaxAlignment != 0) { // Choose the minimal alignment between the non-zero alignment and the ABI // default alignment for the type of the stored value. - SI->setAlignment(std::min(MaxAlignment, TypeAlignment)); + SI->setAlignment(Align(std::min(MaxAlignment, TypeAlignment))); } else { // If both alignments are zero, use ABI default alignment for the type of // the stored value. - SI->setAlignment(TypeAlignment); + SI->setAlignment(Align(TypeAlignment)); } QStore->eraseFromParent(); @@ -3066,7 +3105,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these // stores are conditional, so they can't be unconditionally sunk. But it may @@ -3168,7 +3208,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, bool Changed = false; for (auto *Address : CommonAddresses) Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL); + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI); return Changed; } @@ -3177,7 +3217,8 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -3233,7 +3274,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DL)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any @@ -3697,12 +3738,17 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, BasicBlock *BB = BI->getParent(); + // MSAN does not like undefs as branch condition which can be introduced + // with "explicit branch". + if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) + return false; + LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); // If there are any extra values that couldn't be folded into the switch - // then we evaluate them with an explicit branch first. Split the block + // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { BasicBlock *NewBB = @@ -3851,7 +3897,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); - LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); + auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI()); assert(RI->getValue() == LPInst && "Resume must unwind the exception that caused control to here"); @@ -4178,23 +4224,22 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { IRBuilder<> Builder(TI); if (auto *BI = dyn_cast<BranchInst>(TI)) { if (BI->isUnconditional()) { - if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI->getContext(), TI); - TI->eraseFromParent(); - Changed = true; - } + assert(BI->getSuccessor(0) == BB && "Incorrect CFG"); + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; } else { Value* Cond = BI->getCondition(); if (BI->getSuccessor(0) == BB) { Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); - EraseTerminatorAndDCECond(BI); - } else if (BI->getSuccessor(1) == BB) { + } else { + assert(BI->getSuccessor(1) == BB && "Incorrect CFG"); Builder.CreateAssumption(Cond); Builder.CreateBr(BI->getSuccessor(0)); - EraseTerminatorAndDCECond(BI); - Changed = true; } + EraseTerminatorAndDCECond(BI); + Changed = true; } } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { SwitchInstProfUpdateWrapper SU(*SI); @@ -4276,6 +4321,17 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { return true; } +static void createUnreachableSwitchDefault(SwitchInst *Switch) { + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + BasicBlock *NewDefaultBlock = + SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), ""); + Switch->setDefaultDest(&*NewDefaultBlock); + SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front()); + auto *NewTerminator = NewDefaultBlock->getTerminator(); + new UnreachableInst(Switch->getContext(), NewTerminator); + EraseTerminatorAndDCECond(NewTerminator); +} + /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { @@ -4384,6 +4440,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } + // Clean up the default block - it may have phis or other instructions before + // the unreachable terminator. + if (!HasDefault) + createUnreachableSwitchDefault(SI); + // Drop the switch. SI->eraseFromParent(); @@ -4428,14 +4489,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - BasicBlock *NewDefault = - SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), ""); - SI->setDefaultDest(&*NewDefault); - SplitBlock(&*NewDefault, &NewDefault->front()); - auto *OldTI = NewDefault->getTerminator(); - new UnreachableInst(SI->getContext(), OldTI); - EraseTerminatorAndDCECond(OldTI); + createUnreachableSwitchDefault(SI); return true; } @@ -5031,7 +5085,7 @@ SwitchLookupTable::SwitchLookupTable( Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Set the alignment to that of an array items. We will be only loading one // value out of it. - Array->setAlignment(DL.getPrefTypeAlignment(ValueType)); + Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType))); Kind = ArrayKind; } @@ -5260,7 +5314,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Figure out the corresponding result for each case value and phi node in the // common destination, as well as the min and max case values. - assert(!empty(SI->cases())); + assert(!SI->cases().empty()); SwitchInst::CaseIt CI = SI->case_begin(); ConstantInt *MinCaseVal = CI->getCaseValue(); ConstantInt *MaxCaseVal = CI->getCaseValue(); @@ -5892,7 +5946,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) return requestResimplify(); // Look for diamond patterns. @@ -5900,7 +5954,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL)) + if (mergeConditionalStores(PBI, BI, DL, TTI)) return requestResimplify(); return false; diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index e0def81d5eee..0324993a8203 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -47,7 +48,6 @@ static cl::opt<bool> cl::desc("Enable unsafe double to float " "shrinking for math lib calls")); - //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// @@ -177,7 +177,8 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, if (!isOnlyUsedInComparisonWithZero(CI)) return false; - if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL)) + if (!isDereferenceableAndAlignedPointer(Str, Align::None(), APInt(64, Len), + DL)) return false; if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory)) @@ -186,6 +187,67 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, return true; } +static void annotateDereferenceableBytes(CallInst *CI, + ArrayRef<unsigned> ArgNos, + uint64_t DereferenceableBytes) { + const Function *F = CI->getCaller(); + if (!F) + return; + for (unsigned ArgNo : ArgNos) { + uint64_t DerefBytes = DereferenceableBytes; + unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + DerefBytes = std::max(CI->getDereferenceableOrNullBytes( + ArgNo + AttributeList::FirstArgIndex), + DereferenceableBytes); + + if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) < + DerefBytes) { + CI->removeParamAttr(ArgNo, Attribute::Dereferenceable); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull); + CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes( + CI->getContext(), DerefBytes)); + } + } +} + +static void annotateNonNullBasedOnAccess(CallInst *CI, + ArrayRef<unsigned> ArgNos) { + Function *F = CI->getCaller(); + if (!F) + return; + + for (unsigned ArgNo : ArgNos) { + if (CI->paramHasAttr(ArgNo, Attribute::NonNull)) + continue; + unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (llvm::NullPointerIsDefined(F, AS)) + continue; + + CI->addParamAttr(ArgNo, Attribute::NonNull); + annotateDereferenceableBytes(CI, ArgNo, 1); + } +} + +static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos, + Value *Size, const DataLayout &DL) { + if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) { + annotateNonNullBasedOnAccess(CI, ArgNos); + annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue()); + } else if (isKnownNonZero(Size, DL)) { + annotateNonNullBasedOnAccess(CI, ArgNos); + const APInt *X, *Y; + uint64_t DerefMin = 1; + if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) { + DerefMin = std::min(X->getZExtValue(), Y->getZExtValue()); + annotateDereferenceableBytes(CI, ArgNos, DerefMin); + } + } +} + //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// @@ -194,10 +256,13 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + annotateNonNullBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; --Len; // Unbias length. @@ -232,24 +297,34 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { // Extract some information from the instruction. Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); uint64_t Len; + annotateNonNullBasedOnAccess(CI, 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, 1); // We don't do anything if length is not constant. - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size); + if (LengthArg) { Len = LengthArg->getZExtValue(); - else + // strncat(x, c, 0) -> x + if (!Len) + return Dst; + } else { return nullptr; + } // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) + if (SrcLen) { + annotateDereferenceableBytes(CI, 1, SrcLen); + --SrcLen; // Unbias length. + } else { return nullptr; - --SrcLen; // Unbias length. + } - // Handle the simple, do-nothing cases: // strncat(x, "", c) -> x - // strncat(x, c, 0) -> x - if (SrcLen == 0 || Len == 0) + if (SrcLen == 0) return Dst; // We don't optimize this case. @@ -265,13 +340,18 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); Value *SrcStr = CI->getArgOperand(0); + annotateNonNullBasedOnAccess(CI, 0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (!CharC) { uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. + if (Len) + annotateDereferenceableBytes(CI, 0, Len); + else + return nullptr; + if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. @@ -304,6 +384,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + annotateNonNullBasedOnAccess(CI, 0); // Cannot fold anything if we're not looking for a constant. if (!CharC) @@ -351,7 +432,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); + if (Len1 && Len2) { return emitMemCmp(Str1P, Str2P, ConstantInt::get(DL.getIntPtrType(CI->getContext()), @@ -374,17 +460,22 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { TLI); } + annotateNonNullBasedOnAccess(CI, {0, 1}); return nullptr; } Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + Value *Str1P = CI->getArgOperand(0); + Value *Str2P = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, {0, 1}); // Get the length argument if it is constant. uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size)) Length = LengthArg->getZExtValue(); else return nullptr; @@ -393,7 +484,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { return ConstantInt::get(CI->getType(), 0); if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); + return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -415,7 +506,11 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { CI->getType()); uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); // strncmp to memcmp if (!HasStr1 && HasStr2) { @@ -437,20 +532,38 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilder<> &B) { + Value *Src = CI->getArgOperand(0); + ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen && Size) { + annotateDereferenceableBytes(CI, 0, SrcLen); + if (SrcLen <= Size->getZExtValue() + 1) + return emitStrDup(Src, B, TLI); + } + + return nullptr; +} + Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; - + + annotateNonNullBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); + CallInst *NewCI = + B.CreateMemCpy(Dst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); + NewCI->setAttributes(CI->getAttributes()); return Dst; } @@ -464,7 +577,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; Type *PT = Callee->getFunctionType()->getParamType(0); @@ -474,7 +589,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, LenV); + CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, LenV); + NewCI->setAttributes(CI->getAttributes()); return DstEnd; } @@ -482,37 +598,47 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); - Value *LenOp = CI->getArgOperand(2); + Value *Size = CI->getArgOperand(2); + annotateNonNullBasedOnAccess(CI, 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, 1); + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size)) + Len = LengthArg->getZExtValue(); + else + return nullptr; + + // strncpy(x, y, 0) -> x + if (Len == 0) + return Dst; // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) + if (SrcLen) { + annotateDereferenceableBytes(CI, 1, SrcLen); + --SrcLen; // Unbias length. + } else { return nullptr; - --SrcLen; + } if (SrcLen == 0) { // strncpy(x, "", y) -> memset(align 1 x, '\0', y) - B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, 1); + AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0)); + NewCI->setAttributes(NewCI->getAttributes().addParamAttributes( + CI->getContext(), 0, ArgAttrs)); return Dst; } - uint64_t Len; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) - Len = LengthArg->getZExtValue(); - else - return nullptr; - - if (Len == 0) - return Dst; // strncpy(x, y, 0) -> x - // Let strncpy handle the zero padding if (Len > SrcLen + 1) return nullptr; Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] - B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); - + CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); + NewCI->setAttributes(CI->getAttributes()); return Dst; } @@ -608,7 +734,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, } Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { - return optimizeStringLength(CI, B, 8); + if (Value *V = optimizeStringLength(CI, B, 8)) + return V; + annotateNonNullBasedOnAccess(CI, 0); + return nullptr; } Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { @@ -756,21 +885,35 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } + + annotateNonNullBasedOnAccess(CI, {0, 1}); + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilder<> &B) { + if (isKnownNonZero(CI->getOperand(2), DL)) + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { Value *SrcStr = CI->getArgOperand(0); + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + ConstantInt *LenC = dyn_cast<ConstantInt>(Size); // memchr(x, y, 0) -> null - if (LenC && LenC->isZero()) - return Constant::getNullValue(CI->getType()); + if (LenC) { + if (LenC->isZero()) + return Constant::getNullValue(CI->getType()); + } else { + // From now on we need at least constant length and string. + return nullptr; + } - // From now on we need at least constant length and string. StringRef Str; - if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) return nullptr; // Truncate the string to LenC. If Str is smaller than LenC we will still only @@ -913,6 +1056,7 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, Ret = 1; return ConstantInt::get(CI->getType(), Ret); } + return nullptr; } @@ -925,12 +1069,19 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); // Handle constant lengths. - if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) - if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS, - LenC->getZExtValue(), B, DL)) - return Res; + ConstantInt *LenC = dyn_cast<ConstantInt>(Size); + if (!LenC) + return nullptr; + // memcmp(d,s,0) -> 0 + if (LenC->getZExtValue() == 0) + return Constant::getNullValue(CI->getType()); + + if (Value *Res = + optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL)) + return Res; return nullptr; } @@ -939,9 +1090,9 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { return V; // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 - // `bcmp` can be more efficient than memcmp because it only has to know that - // there is a difference, not where it is. - if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) { + // bcmp can be more efficient than memcmp because it only has to know that + // there is a difference, not how different one is to the other. + if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) { Value *LHS = CI->getArgOperand(0); Value *RHS = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); @@ -956,16 +1107,37 @@ Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa<IntrinsicInst>(CI)) + return nullptr; + // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *N = CI->getArgOperand(2); + // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n + CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N); + NewCI->setAttributes(CI->getAttributes()); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); +} + Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa<IntrinsicInst>(CI)) + return nullptr; + // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = + B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } @@ -1003,25 +1175,29 @@ Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) { B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); const DataLayout &DL = Malloc->getModule()->getDataLayout(); IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); - Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), - Malloc->getArgOperand(0), Malloc->getAttributes(), - B, *TLI); - if (!Calloc) - return nullptr; - - Malloc->replaceAllUsesWith(Calloc); - eraseFromParent(Malloc); + if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), + Malloc->getArgOperand(0), + Malloc->getAttributes(), B, *TLI)) { + substituteInParent(Malloc, Calloc); + return Calloc; + } - return Calloc; + return nullptr; } Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); + if (isa<IntrinsicInst>(CI)) + return nullptr; + if (auto *Calloc = foldMallocMemset(CI, B)) return Calloc; // memset(p, v, n) -> llvm.memset(align 1 p, v, n) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } @@ -1096,21 +1272,18 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, if (!V[0] || (isBinary && !V[1])) return nullptr; - StringRef CalleeNm = CalleeFn->getName(); - AttributeList CalleeAt = CalleeFn->getAttributes(); - bool CalleeIn = CalleeFn->isIntrinsic(); - // If call isn't an intrinsic, check that it isn't within a function with the // same name as the float version of this call, otherwise the result is an // infinite loop. For example, from MinGW-w64: // // float expf(float val) { return (float) exp((double) val); } - if (!CalleeIn) { - const Function *Fn = CI->getFunction(); - StringRef FnName = Fn->getName(); - if (FnName.back() == 'f' && - FnName.size() == (CalleeNm.size() + 1) && - FnName.startswith(CalleeNm)) + StringRef CalleeName = CalleeFn->getName(); + bool IsIntrinsic = CalleeFn->isIntrinsic(); + if (!IsIntrinsic) { + StringRef CallerName = CI->getFunction()->getName(); + if (!CallerName.empty() && CallerName.back() == 'f' && + CallerName.size() == (CalleeName.size() + 1) && + CallerName.startswith(CalleeName)) return nullptr; } @@ -1120,16 +1293,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, // g((double) float) -> (double) gf(float) Value *R; - if (CalleeIn) { + if (IsIntrinsic) { Module *M = CI->getModule(); Intrinsic::ID IID = CalleeFn->getIntrinsicID(); Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]); + } else { + AttributeList CalleeAttrs = CalleeFn->getAttributes(); + R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs) + : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs); } - else - R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt) - : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt); - return B.CreateFPExt(R, B.getDoubleTy()); } @@ -1234,9 +1407,25 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { return InnerChain[Exp]; } +// Return a properly extended 32-bit integer if the operation is an itofp. +static Value *getIntToFPVal(Value *I2F, IRBuilder<> &B) { + if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) { + Value *Op = cast<Instruction>(I2F)->getOperand(0); + // Make sure that the exponent fits inside an int32_t, + // thus avoiding any range issues that FP has not. + unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits(); + if (BitWidth < 32 || + (BitWidth == 32 && isa<SIToFPInst>(I2F))) + return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getInt32Ty()) + : B.CreateZExt(Op, B.getInt32Ty()); + } + + return nullptr; +} + /// Use exp{,2}(x * y) for pow(exp{,2}(x), y); -/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x); -/// exp2(log2(n) * x) for pow(n, x). +/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x); +/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x). Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); @@ -1269,9 +1458,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { StringRef ExpName; Intrinsic::ID ID; Value *ExpFn; - LibFunc LibFnFloat; - LibFunc LibFnDouble; - LibFunc LibFnLongDouble; + LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble; switch (LibFn) { default: @@ -1305,9 +1492,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { // elimination cannot be trusted to remove it, since it may have side // effects (e.g., errno). When the only consumer for the original // exp{,2}() is pow(), then it has to be explicitly erased. - BaseFn->replaceAllUsesWith(ExpFn); - eraseFromParent(BaseFn); - + substituteInParent(BaseFn, ExpFn); return ExpFn; } } @@ -1318,8 +1503,18 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) return nullptr; + // pow(2.0, itofp(x)) -> ldexp(1.0, x) + if (match(Base, m_SpecificFP(2.0)) && + (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) && + hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *ExpoI = getIntToFPVal(Expo, B)) + return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI, + LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, + B, Attrs); + } + // pow(2.0 ** n, x) -> exp2(n * x) - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { + if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; @@ -1344,7 +1539,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { // pow(10.0, x) -> exp10(x) // TODO: There is no exp10() intrinsic yet, but some day there shall be one. if (match(Base, m_SpecificFP(10.0)) && - hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, B, Attrs); @@ -1359,17 +1554,15 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { if (Log) { Value *FMul = B.CreateFMul(Log, Expo, "mul"); - if (Pow->doesNotAccessMemory()) { + if (Pow->doesNotAccessMemory()) return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), FMul, "exp2"); - } else { - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l)) - return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs); - } + else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) + return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l, B, Attrs); } } + return nullptr; } @@ -1384,8 +1577,7 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, } // Otherwise, use the libcall for sqrt(). - if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) + if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) // TODO: We also should check that the target can in fact lower the sqrt() // libcall. We currently have no way to ask this question, so we ask if // the target has a sqrt() libcall, which is not exactly the same. @@ -1452,7 +1644,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. - if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) + if (!hasFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) return nullptr; // Propagate the math semantics from the call to any created instructions. @@ -1480,8 +1672,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { if (match(Expo, m_SpecificFP(-1.0))) return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); - // pow(x, 0.0) -> 1.0 - if (match(Expo, m_SpecificFP(0.0))) + // pow(x, +/-0.0) -> 1.0 + if (match(Expo, m_AnyZeroFP())) return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x @@ -1558,16 +1750,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // powf(x, itofp(y)) -> powi(x, y) if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) { - Value *IntExpo = cast<Instruction>(Expo)->getOperand(0); - Value *NewExpo = nullptr; - unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); - if (isa<SIToFPInst>(Expo) && BitWidth == 32) - NewExpo = IntExpo; - else if (BitWidth < 32) - NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) - : B.CreateZExt(IntExpo, B.getInt32Ty()); - if (NewExpo) - return createPowWithIntegerExponent(Base, NewExpo, M, B); + if (Value *ExpoI = getIntToFPVal(Expo, B)) + return createPowWithIntegerExponent(Base, ExpoI, M, B); } return Shrunk; @@ -1575,45 +1759,25 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) + Value *Ret = nullptr; + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && + hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); + Type *Ty = CI->getType(); Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 - LibFunc LdExp = LibFunc_ldexpl; - if (Op->getType()->isFloatTy()) - LdExp = LibFunc_ldexpf; - else if (Op->getType()->isDoubleTy()) - LdExp = LibFunc_ldexp; - - if (TLI->has(LdExp)) { - Value *LdExpArg = nullptr; - if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); - } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); - } - - if (LdExpArg) { - Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f)); - if (!Op->getType()->isFloatTy()) - One = ConstantExpr::getFPExtend(One, Op->getType()); - - Module *M = CI->getModule(); - FunctionCallee NewCallee = M->getOrInsertFunction( - TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty()); - CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; - } + if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) && + hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *Exp = getIntToFPVal(Op, B)) + return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, + LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, + B, CI->getCalledFunction()->getAttributes()); } + return Ret; } @@ -1644,48 +1808,155 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); } -Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); +Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) { + Function *LogFn = Log->getCalledFunction(); + AttributeList Attrs = LogFn->getAttributes(); + StringRef LogNm = LogFn->getName(); + Intrinsic::ID LogID = LogFn->getIntrinsicID(); + Module *Mod = Log->getModule(); + Type *Ty = Log->getType(); Value *Ret = nullptr; - StringRef Name = Callee->getName(); - if (UnsafeFPShrink && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!CI->isFast()) - return Ret; - Value *Op1 = CI->getArgOperand(0); - auto *OpC = dyn_cast<CallInst>(Op1); + if (UnsafeFPShrink && hasFloatVersion(LogNm)) + Ret = optimizeUnaryDoubleFP(Log, B, true); // The earlier call must also be 'fast' in order to do these transforms. - if (!OpC || !OpC->isFast()) + CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0)); + if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse()) return Ret; - // log(pow(x,y)) -> y*log(x) - // This is only applicable to log, log2, log10. - if (Name != "log" && Name != "log2" && Name != "log10") + LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb; + + // This is only applicable to log(), log2(), log10(). + if (TLI->getLibFunc(LogNm, LogLb)) + switch (LogLb) { + case LibFunc_logf: + LogID = Intrinsic::log; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log: + LogID = Intrinsic::log; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_logl: + LogID = Intrinsic::log; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log2f: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log2: + LogID = Intrinsic::log2; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log2l: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log10f: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log10: + LogID = Intrinsic::log10; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log10l: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + default: + return Ret; + } + else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 || + LogID == Intrinsic::log10) { + if (Ty->getScalarType()->isFloatTy()) { + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + } else if (Ty->getScalarType()->isDoubleTy()) { + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + } else + return Ret; + } else return Ret; IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - FMF.setFast(); - B.setFastMathFlags(FMF); + B.setFastMathFlags(FastMathFlags::getFast()); + + Intrinsic::ID ArgID = Arg->getIntrinsicID(); + LibFunc ArgLb = NotLibFunc; + TLI->getLibFunc(Arg, ArgLb); + + // log(pow(x,y)) -> y*log(x) + if (ArgLb == PowLb || ArgID == Intrinsic::pow) { + Value *LogX = + Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Arg->getOperand(0), "log") + : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); + // Since pow() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } + + // log(exp{,2,10}(y)) -> y*log({e,2,10}) + // TODO: There is no exp10() intrinsic yet. + if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb || + ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) { + Constant *Eul; + if (ArgLb == ExpLb || ArgID == Intrinsic::exp) + // FIXME: Add more precise value of e for long double. + Eul = ConstantFP::get(Log->getType(), numbers::e); + else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2) + Eul = ConstantFP::get(Log->getType(), 2.0); + else + Eul = ConstantFP::get(Log->getType(), 10.0); + Value *LogE = Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Eul, "log") + : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul"); + // Since exp() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } - LibFunc Func; - Function *F = OpC->getCalledFunction(); - if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow)) - return B.CreateFMul(OpC->getArgOperand(1), - emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, - Callee->getAttributes()), "mul"); - - // log(exp2(y)) -> y*log(2) - if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && - TLI->has(Func) && Func == LibFunc_exp2) - return B.CreateFMul( - OpC->getArgOperand(0), - emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), - Callee->getName(), B, Callee->getAttributes()), - "logmul"); return Ret; } @@ -2137,6 +2408,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { return New; } + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } @@ -2231,21 +2503,21 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { return New; } + annotateNonNullBasedOnAccess(CI, {0, 1}); return nullptr; } Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) - return nullptr; - // Check for size ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (!Size) return nullptr; uint64_t N = Size->getZExtValue(); + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) + return nullptr; // If we just have a format string (nothing else crazy) transform it. if (CI->getNumArgOperands() == 3) { @@ -2318,6 +2590,8 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) { return V; } + if (isKnownNonZero(CI->getOperand(1), DL)) + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } @@ -2503,6 +2777,7 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { + annotateNonNullBasedOnAccess(CI, 0); if (!CI->use_empty()) return nullptr; @@ -2515,6 +2790,12 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) { + // bcopy(src, dst, n) -> llvm.memmove(dst, src, n) + return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1, + CI->getArgOperand(2)); +} + bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { LibFunc Func; SmallString<20> FloatFuncName = FuncName; @@ -2557,6 +2838,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrLen(CI, Builder); case LibFunc_strpbrk: return optimizeStrPBrk(CI, Builder); + case LibFunc_strndup: + return optimizeStrNDup(CI, Builder); case LibFunc_strtol: case LibFunc_strtod: case LibFunc_strtof: @@ -2573,12 +2856,16 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrStr(CI, Builder); case LibFunc_memchr: return optimizeMemChr(CI, Builder); + case LibFunc_memrchr: + return optimizeMemRChr(CI, Builder); case LibFunc_bcmp: return optimizeBCmp(CI, Builder); case LibFunc_memcmp: return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_mempcpy: + return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: return optimizeMemMove(CI, Builder); case LibFunc_memset: @@ -2587,6 +2874,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeRealloc(CI, Builder); case LibFunc_wcslen: return optimizeWcslen(CI, Builder); + case LibFunc_bcopy: + return optimizeBCopy(CI, Builder); default: break; } @@ -2626,11 +2915,21 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_sqrt: case LibFunc_sqrtl: return optimizeSqrt(CI, Builder); + case LibFunc_logf: case LibFunc_log: + case LibFunc_logl: + case LibFunc_log10f: case LibFunc_log10: + case LibFunc_log10l: + case LibFunc_log1pf: case LibFunc_log1p: + case LibFunc_log1pl: + case LibFunc_log2f: case LibFunc_log2: + case LibFunc_log2l: + case LibFunc_logbf: case LibFunc_logb: + case LibFunc_logbl: return optimizeLog(CI, Builder); case LibFunc_tan: case LibFunc_tanf: @@ -2721,10 +3020,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { case Intrinsic::exp2: return optimizeExp2(CI, Builder); case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); // TODO: Use foldMallocMemset() with memset intrinsic. + case Intrinsic::memset: + return optimizeMemSet(CI, Builder); + case Intrinsic::memcpy: + return optimizeMemCpy(CI, Builder); + case Intrinsic::memmove: + return optimizeMemMove(CI, Builder); default: return nullptr; } @@ -2740,8 +3047,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { IRBuilder<> TmpBuilder(SimplifiedCI); if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) { // If we were able to further simplify, remove the now redundant call. - SimplifiedCI->replaceAllUsesWith(V); - eraseFromParent(SimplifiedCI); + substituteInParent(SimplifiedCI, V); return V; } } @@ -2898,7 +3204,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp)); // If the length is 0 we don't know how long it is and so we can't // remove the check. - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, *StrOp, Len); + else return false; return ObjSizeCI->getZExtValue() >= Len; } @@ -2915,8 +3223,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = B.CreateMemCpy( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2925,8 +3234,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = B.CreateMemMove( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2938,7 +3248,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, if (isFortifiedCallFoldable(CI, 3, 2)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + CallInst *NewCI = + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2974,7 +3286,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; Type *SizeTTy = DL.getIntPtrType(CI->getContext()); diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 456724779b43..5d380dcf231c 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -380,11 +380,11 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, // TODO see if there is a more elegant solution to selecting the rewrite // descriptor type if (!Target.empty()) - DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>( + DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>( Source, Target, Naked)); else DL->push_back( - llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform)); + std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform)); return true; } @@ -442,11 +442,11 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } if (!Target.empty()) - DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>( + DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>( Source, Target, /*Naked*/ false)); else - DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>( + DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>( Source, Transform)); return true; @@ -505,11 +505,11 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } if (!Target.empty()) - DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>( + DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>( Source, Target, /*Naked*/ false)); else - DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>( + DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>( Source, Transform)); return true; diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index a77bf50fe10b..591e1fd2dbee 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -431,7 +431,7 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal); NewLoad->takeName(SrcVal); - NewLoad->setAlignment(SrcVal->getAlignment()); + NewLoad->setAlignment(MaybeAlign(SrcVal->getAlignment())); LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fbc3407c301f..da68d3713b40 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -27,8 +27,8 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" @@ -66,7 +66,7 @@ struct WorklistEntry { enum EntryKind { MapGlobalInit, MapAppendingVar, - MapGlobalAliasee, + MapGlobalIndirectSymbol, RemapFunction }; struct GVInitTy { @@ -77,9 +77,9 @@ struct WorklistEntry { GlobalVariable *GV; Constant *InitPrefix; }; - struct GlobalAliaseeTy { - GlobalAlias *GA; - Constant *Aliasee; + struct GlobalIndirectSymbolTy { + GlobalIndirectSymbol *GIS; + Constant *Target; }; unsigned Kind : 2; @@ -89,7 +89,7 @@ struct WorklistEntry { union { GVInitTy GVInit; AppendingGVTy AppendingGV; - GlobalAliaseeTy GlobalAliasee; + GlobalIndirectSymbolTy GlobalIndirectSymbol; Function *RemapF; } Data; }; @@ -161,8 +161,8 @@ public: bool IsOldCtorDtor, ArrayRef<Constant *> NewMembers, unsigned MCID); - void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID); + void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target, + unsigned MCID); void scheduleRemapFunction(Function &F, unsigned MCID); void flush(); @@ -172,7 +172,7 @@ private: void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, bool IsOldCtorDtor, ArrayRef<Constant *> NewMembers); - void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee); + void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target); void remapFunction(Function &F, ValueToValueMapTy &VM); ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } @@ -774,20 +774,6 @@ Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) { return *getMappedOp(&FirstN); } -namespace { - -struct MapMetadataDisabler { - ValueToValueMapTy &VM; - - MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { - VM.disableMapMetadata(); - } - - ~MapMetadataDisabler() { VM.enableMapMetadata(); } -}; - -} // end anonymous namespace - Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) { // If the value already exists in the map, use it. if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD)) @@ -802,9 +788,6 @@ Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) { return const_cast<Metadata *>(MD); if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) { - // Disallow recursion into metadata mapping through mapValue. - MapMetadataDisabler MMD(getVM()); - // Don't memoize ConstantAsMetadata. Instead of lasting until the // LLVMContext is destroyed, they can be deleted when the GlobalValue they // reference is destructed. These aren't super common, so the extra @@ -846,9 +829,9 @@ void Mapper::flush() { AppendingInits.resize(PrefixSize); break; } - case WorklistEntry::MapGlobalAliasee: - E.Data.GlobalAliasee.GA->setAliasee( - mapConstant(E.Data.GlobalAliasee.Aliasee)); + case WorklistEntry::MapGlobalIndirectSymbol: + E.Data.GlobalIndirectSymbol.GIS->setIndirectSymbol( + mapConstant(E.Data.GlobalIndirectSymbol.Target)); break; case WorklistEntry::RemapFunction: remapFunction(*E.Data.RemapF); @@ -1041,16 +1024,16 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, AppendingInits.append(NewMembers.begin(), NewMembers.end()); } -void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule"); +void Mapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, unsigned MCID) { + assert(AlreadyScheduled.insert(&GIS).second && "Should not reschedule"); assert(MCID < MCs.size() && "Invalid mapping context"); WorklistEntry WE; - WE.Kind = WorklistEntry::MapGlobalAliasee; + WE.Kind = WorklistEntry::MapGlobalIndirectSymbol; WE.MCID = MCID; - WE.Data.GlobalAliasee.GA = &GA; - WE.Data.GlobalAliasee.Aliasee = &Aliasee; + WE.Data.GlobalIndirectSymbol.GIS = &GIS; + WE.Data.GlobalIndirectSymbol.Target = &Target; Worklist.push_back(WE); } @@ -1147,9 +1130,10 @@ void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); } -void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID); +void ValueMapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalIndirectSymbol(GIS, Target, MCID); } void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) { |