summaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp64
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp94
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp8
-rw-r--r--lib/Transforms/Utils/CanonicalizeAliases.cpp1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp15
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp18
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp309
-rw-r--r--lib/Transforms/Utils/EntryExitInstrumenter.cpp2
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp2
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp20
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp2
-rw-r--r--lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp6
-rw-r--r--lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--lib/Transforms/Utils/Local.cpp209
-rw-r--r--lib/Transforms/Utils/LoopRotationUtils.cpp27
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp15
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp12
-rw-r--r--lib/Transforms/Utils/LoopUnrollAndJam.cpp6
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp161
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp56
-rw-r--r--lib/Transforms/Utils/LoopVersioning.cpp4
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp5
-rw-r--r--lib/Transforms/Utils/MisExpect.cpp177
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp2
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp80
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp250
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp688
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp12
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp2
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp60
30 files changed, 1579 insertions, 730 deletions
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5fa371377c85..d85cc40c372a 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -170,7 +170,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
LoopInfo *LI, MemorySSAUpdater *MSSAU,
- MemoryDependenceResults *MemDep) {
+ MemoryDependenceResults *MemDep,
+ bool PredecessorWithTwoSuccessors) {
if (BB->hasAddressTaken())
return false;
@@ -185,9 +186,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
return false;
// Can't merge if there are multiple distinct successors.
- if (PredBB->getUniqueSuccessor() != BB)
+ if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB)
return false;
+ // Currently only allow PredBB to have two predecessors, one being BB.
+ // Update BI to branch to BB's only successor instead of BB.
+ BranchInst *PredBB_BI;
+ BasicBlock *NewSucc = nullptr;
+ unsigned FallThruPath;
+ if (PredecessorWithTwoSuccessors) {
+ if (!(PredBB_BI = dyn_cast<BranchInst>(PredBB->getTerminator())))
+ return false;
+ BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BB_JmpI || !BB_JmpI->isUnconditional())
+ return false;
+ NewSucc = BB_JmpI->getSuccessor(0);
+ FallThruPath = PredBB_BI->getSuccessor(0) == BB ? 0 : 1;
+ }
+
// Can't merge if there is PHI loop.
for (PHINode &PN : BB->phis())
for (Value *IncValue : PN.incoming_values())
@@ -227,18 +243,39 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
- if (MSSAU)
- MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin()));
+ Instruction *PTI = PredBB->getTerminator();
+ Instruction *STI = BB->getTerminator();
+ Instruction *Start = &*BB->begin();
+ // If there's nothing to move, mark the starting instruction as the last
+ // instruction in the block.
+ if (Start == STI)
+ Start = PTI;
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PTI->getIterator(), BB->getInstList(),
+ BB->begin(), STI->getIterator());
- // Delete the unconditional branch from the predecessor...
- PredBB->getInstList().pop_back();
+ if (MSSAU)
+ MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start);
// Make all PHI nodes that referred to BB now refer to Pred as their
// source...
BB->replaceAllUsesWith(PredBB);
- // Move all definitions in the successor to the predecessor...
- PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+ if (PredecessorWithTwoSuccessors) {
+ // Delete the unconditional branch from BB.
+ BB->getInstList().pop_back();
+
+ // Update branch in the predecessor.
+ PredBB_BI->setSuccessor(FallThruPath, NewSucc);
+ } else {
+ // Delete the unconditional branch from the predecessor.
+ PredBB->getInstList().pop_back();
+
+ // Move terminator instruction.
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+ }
+ // Add unreachable to now empty BB.
new UnreachableInst(BB->getContext(), BB);
// Eliminate duplicate dbg.values describing the entry PHI node post-splice.
@@ -274,11 +311,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
"applying corresponding DTU updates.");
DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(BB);
- }
-
- else {
+ } else {
BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
}
+
return true;
}
@@ -365,11 +401,13 @@ llvm::SplitAllCriticalEdges(Function &F,
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU, const Twine &BBName) {
BasicBlock::iterator SplitIt = SplitPt->getIterator();
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
- BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+ std::string Name = BBName.str();
+ BasicBlock *New = Old->splitBasicBlock(
+ SplitIt, Name.empty() ? Old->getName() + ".split" : Name);
// The new block lives in whichever loop the old one did. This preserves
// LCSSA as well, because we force the split point to be after any PHI nodes.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 27f110e24f9c..71316ce8f758 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -88,6 +88,14 @@ static bool setDoesNotCapture(Function &F, unsigned ArgNo) {
return true;
}
+static bool setDoesNotAlias(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::NoAlias))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::NoAlias);
+ ++NumNoAlias;
+ return true;
+}
+
static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly))
return false;
@@ -175,6 +183,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_strcpy:
case LibFunc_strncpy:
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ LLVM_FALLTHROUGH;
case LibFunc_strcat:
case LibFunc_strncat:
Changed |= setReturnedArg(F, 0);
@@ -249,12 +260,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_sprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_snprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
@@ -291,11 +304,23 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memcpy:
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ Changed |= setReturnedArg(F, 0);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
case LibFunc_memmove:
Changed |= setReturnedArg(F, 0);
- LLVM_FALLTHROUGH;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
case LibFunc_mempcpy:
case LibFunc_memccpy:
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
@@ -760,9 +785,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
}
}
-bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
+bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) {
switch (Ty->getTypeID()) {
case Type::HalfTyID:
return false;
@@ -775,10 +799,10 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
-StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
- assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
+ assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
"Cannot get name for unavailable function!");
switch (Ty->getTypeID()) {
@@ -827,6 +851,12 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
}
+Value *llvm::emitStrDup(Value *Ptr, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(),
+ castToCStr(Ptr, B), B, TLI);
+}
+
Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
@@ -1045,24 +1075,28 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
LibFunc LongDoubleFn, IRBuilder<> &B,
const AttributeList &Attrs) {
// Get the name of the function according to TLI.
- StringRef Name = getUnaryFloatFn(TLI, Op->getType(),
- DoubleFn, FloatFn, LongDoubleFn);
+ StringRef Name = getFloatFnName(TLI, Op->getType(),
+ DoubleFn, FloatFn, LongDoubleFn);
return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
}
-Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilder<> &B, const AttributeList &Attrs) {
+static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
+ StringRef Name, IRBuilder<> &B,
+ const AttributeList &Attrs) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
- SmallString<20> NameBuffer;
- appendTypeSuffix(Op1, Name, NameBuffer);
-
Module *M = B.GetInsertBlock()->getModule();
- FunctionCallee Callee = M->getOrInsertFunction(
- Name, Op1->getType(), Op1->getType(), Op2->getType());
- CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
- CI->setAttributes(Attrs);
+ FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(),
+ Op1->getType(), Op2->getType());
+ CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name);
+
+ // The incoming attribute set may have come from a speculatable intrinsic, but
+ // is being replaced with a library call which is not allowed to be
+ // speculatable.
+ CI->setAttributes(Attrs.removeAttribute(B.getContext(),
+ AttributeList::FunctionIndex,
+ Attribute::Speculatable));
if (const Function *F =
dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1070,6 +1104,28 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
return CI;
}
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
+ IRBuilder<> &B, const AttributeList &Attrs) {
+ assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
+
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op1, Name, NameBuffer);
+
+ return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+}
+
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+ const TargetLibraryInfo *TLI,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ // Get the name of the function according to TLI.
+ StringRef Name = getFloatFnName(TLI, Op1->getType(),
+ DoubleFn, FloatFn, LongDoubleFn);
+
+ return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+}
+
Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc_putchar))
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index df299f673f65..9a6761040bd8 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -448,13 +448,17 @@ bool llvm::bypassSlowDivision(BasicBlock *BB,
DivCacheTy PerBBDivCache;
bool MadeChange = false;
- Instruction* Next = &*BB->begin();
+ Instruction *Next = &*BB->begin();
while (Next != nullptr) {
// We may add instructions immediately after I, but we want to skip over
// them.
- Instruction* I = Next;
+ Instruction *I = Next;
Next = Next->getNextNode();
+ // Ignore dead code to save time and avoid bugs.
+ if (I->hasNUses(0))
+ continue;
+
FastDivInsertionTask Task(I, BypassWidths);
if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
I->replaceAllUsesWith(Replacement);
diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp
index 455fcbb1cf98..3c7c8d872595 100644
--- a/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1026c9d37038..75e8963303c2 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -210,6 +210,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
RemapInstruction(&II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
+
+ // Register all DICompileUnits of the old parent module in the new parent module
+ auto* OldModule = OldFunc->getParent();
+ auto* NewModule = NewFunc->getParent();
+ if (OldModule && NewModule && OldModule != NewModule && DIFinder.compile_unit_count()) {
+ auto* NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
+ // Avoid multiple insertions of the same DICompileUnit to NMD.
+ SmallPtrSet<const void*, 8> Visited;
+ for (auto* Operand : NMD->operands())
+ Visited.insert(Operand);
+ for (auto* Unit : DIFinder.compile_units())
+ // VMap.MD()[Unit] == Unit
+ if (Visited.insert(Unit).second)
+ NMD->addOperand(Unit);
+ }
}
/// Return a copy of the specified function and add it to that function's
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 7ddf59becba9..2c8c3abb2922 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -48,7 +48,7 @@ std::unique_ptr<Module> llvm::CloneModule(
function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
std::unique_ptr<Module> New =
- llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
+ std::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
New->setSourceFileName(M.getSourceFileName());
New->setDataLayout(M.getDataLayout());
New->setTargetTriple(M.getTargetTriple());
@@ -181,13 +181,25 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// And named metadata....
+ const auto* LLVM_DBG_CU = M.getNamedMetadata("llvm.dbg.cu");
for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end();
I != E; ++I) {
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
- for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
+ if (&NMD == LLVM_DBG_CU) {
+ // Do not insert duplicate operands.
+ SmallPtrSet<const void*, 8> Visited;
+ for (const auto* Operand : NewNMD->operands())
+ Visited.insert(Operand);
+ for (const auto* Operand : NMD.operands()) {
+ auto* MappedOperand = MapMetadata(Operand, VMap);
+ if (Visited.insert(MappedOperand).second)
+ NewNMD->addOperand(MappedOperand);
+ }
+ } else
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
}
return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index fa6d3f8ae873..0298ff9a395f 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -293,10 +293,8 @@ static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
CommonExitBlock = Succ;
continue;
}
- if (CommonExitBlock == Succ)
- continue;
-
- return true;
+ if (CommonExitBlock != Succ)
+ return true;
}
return false;
};
@@ -307,52 +305,79 @@ static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
return CommonExitBlock;
}
-bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
- Instruction *Addr) const {
- AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
- Function *Func = (*Blocks.begin())->getParent();
- for (BasicBlock &BB : *Func) {
- if (Blocks.count(&BB))
- continue;
- for (Instruction &II : BB) {
- if (isa<DbgInfoIntrinsic>(II))
- continue;
+CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &II : BB.instructionsWithoutDebug())
+ if (auto *AI = dyn_cast<AllocaInst>(&II))
+ Allocas.push_back(AI);
- unsigned Opcode = II.getOpcode();
- Value *MemAddr = nullptr;
- switch (Opcode) {
- case Instruction::Store:
- case Instruction::Load: {
- if (Opcode == Instruction::Store) {
- StoreInst *SI = cast<StoreInst>(&II);
- MemAddr = SI->getPointerOperand();
- } else {
- LoadInst *LI = cast<LoadInst>(&II);
- MemAddr = LI->getPointerOperand();
- }
- // Global variable can not be aliased with locals.
- if (dyn_cast<Constant>(MemAddr))
- break;
- Value *Base = MemAddr->stripInBoundsConstantOffsets();
- if (!isa<AllocaInst>(Base) || Base == AI)
- return false;
+ findSideEffectInfoForBlock(BB);
+ }
+}
+
+void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
+ for (Instruction &II : BB.instructionsWithoutDebug()) {
+ unsigned Opcode = II.getOpcode();
+ Value *MemAddr = nullptr;
+ switch (Opcode) {
+ case Instruction::Store:
+ case Instruction::Load: {
+ if (Opcode == Instruction::Store) {
+ StoreInst *SI = cast<StoreInst>(&II);
+ MemAddr = SI->getPointerOperand();
+ } else {
+ LoadInst *LI = cast<LoadInst>(&II);
+ MemAddr = LI->getPointerOperand();
+ }
+ // Global variable can not be aliased with locals.
+ if (dyn_cast<Constant>(MemAddr))
break;
+ Value *Base = MemAddr->stripInBoundsConstantOffsets();
+ if (!isa<AllocaInst>(Base)) {
+ SideEffectingBlocks.insert(&BB);
+ return;
}
- default: {
- IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
- if (IntrInst) {
- if (IntrInst->isLifetimeStartOrEnd())
- break;
- return false;
- }
- // Treat all the other cases conservatively if it has side effects.
- if (II.mayHaveSideEffects())
- return false;
+ BaseMemAddrs[&BB].insert(Base);
+ break;
+ }
+ default: {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
+ if (IntrInst) {
+ if (IntrInst->isLifetimeStartOrEnd())
+ break;
+ SideEffectingBlocks.insert(&BB);
+ return;
}
+ // Treat all the other cases conservatively if it has side effects.
+ if (II.mayHaveSideEffects()) {
+ SideEffectingBlocks.insert(&BB);
+ return;
}
}
+ }
}
+}
+bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
+ BasicBlock &BB, AllocaInst *Addr) const {
+ if (SideEffectingBlocks.count(&BB))
+ return true;
+ auto It = BaseMemAddrs.find(&BB);
+ if (It != BaseMemAddrs.end())
+ return It->second.count(Addr);
+ return false;
+}
+
+bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
+ const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const {
+ AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ if (CEAC.doesBlockContainClobberOfAddr(BB, AI))
+ return false;
+ }
return true;
}
@@ -415,7 +440,8 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
// outline region. If there are not other untracked uses of the address, return
// the pair of markers if found; otherwise return a pair of nullptr.
CodeExtractor::LifetimeMarkerInfo
-CodeExtractor::getLifetimeMarkers(Instruction *Addr,
+CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+ Instruction *Addr,
BasicBlock *ExitBlock) const {
LifetimeMarkerInfo Info;
@@ -447,7 +473,7 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr,
Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
// Do legality check.
if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
- !isLegalToShrinkwrapLifetimeMarkers(Addr))
+ !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr))
return {};
// Check to see if we have a place to do hoisting, if not, bail.
@@ -457,7 +483,8 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr,
return Info;
}
-void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
+void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &SinkCands, ValueSet &HoistCands,
BasicBlock *&ExitBlock) const {
Function *Func = (*Blocks.begin())->getParent();
ExitBlock = getCommonExitBlock(Blocks);
@@ -478,74 +505,104 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
return true;
};
- for (BasicBlock &BB : *Func) {
- if (Blocks.count(&BB))
+ // Look up allocas in the original function in CodeExtractorAnalysisCache, as
+ // this is much faster than walking all the instructions.
+ for (AllocaInst *AI : CEAC.getAllocas()) {
+ BasicBlock *BB = AI->getParent();
+ if (Blocks.count(BB))
continue;
- for (Instruction &II : BB) {
- auto *AI = dyn_cast<AllocaInst>(&II);
- if (!AI)
- continue;
- LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock);
- bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
- if (Moved) {
- LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
- SinkCands.insert(AI);
- continue;
- }
+ // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
+ // check whether it is actually still in the original function.
+ Function *AIFunc = BB->getParent();
+ if (AIFunc != Func)
+ continue;
- // Follow any bitcasts.
- SmallVector<Instruction *, 2> Bitcasts;
- SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
- for (User *U : AI->users()) {
- if (U->stripInBoundsConstantOffsets() == AI) {
- Instruction *Bitcast = cast<Instruction>(U);
- LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock);
- if (LMI.LifeStart) {
- Bitcasts.push_back(Bitcast);
- BitcastLifetimeInfo.push_back(LMI);
- continue;
- }
- }
+ LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
+ bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
+ if (Moved) {
+ LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
+ SinkCands.insert(AI);
+ continue;
+ }
- // Found unknown use of AI.
- if (!definedInRegion(Blocks, U)) {
- Bitcasts.clear();
- break;
+ // Follow any bitcasts.
+ SmallVector<Instruction *, 2> Bitcasts;
+ SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
+ for (User *U : AI->users()) {
+ if (U->stripInBoundsConstantOffsets() == AI) {
+ Instruction *Bitcast = cast<Instruction>(U);
+ LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
+ if (LMI.LifeStart) {
+ Bitcasts.push_back(Bitcast);
+ BitcastLifetimeInfo.push_back(LMI);
+ continue;
}
}
- // Either no bitcasts reference the alloca or there are unknown uses.
- if (Bitcasts.empty())
- continue;
+ // Found unknown use of AI.
+ if (!definedInRegion(Blocks, U)) {
+ Bitcasts.clear();
+ break;
+ }
+ }
- LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
- SinkCands.insert(AI);
- for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
- Instruction *BitcastAddr = Bitcasts[I];
- const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
- assert(LMI.LifeStart &&
- "Unsafe to sink bitcast without lifetime markers");
- moveOrIgnoreLifetimeMarkers(LMI);
- if (!definedInRegion(Blocks, BitcastAddr)) {
- LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
- << "\n");
- SinkCands.insert(BitcastAddr);
- }
+ // Either no bitcasts reference the alloca or there are unknown uses.
+ if (Bitcasts.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
+ SinkCands.insert(AI);
+ for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
+ Instruction *BitcastAddr = Bitcasts[I];
+ const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
+ assert(LMI.LifeStart &&
+ "Unsafe to sink bitcast without lifetime markers");
+ moveOrIgnoreLifetimeMarkers(LMI);
+ if (!definedInRegion(Blocks, BitcastAddr)) {
+ LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
+ << "\n");
+ SinkCands.insert(BitcastAddr);
}
}
}
}
+bool CodeExtractor::isEligible() const {
+ if (Blocks.empty())
+ return false;
+ BasicBlock *Header = *Blocks.begin();
+ Function *F = Header->getParent();
+
+ // For functions with varargs, check that varargs handling is only done in the
+ // outlined function, i.e vastart and vaend are only used in outlined blocks.
+ if (AllowVarArgs && F->getFunctionType()->isVarArg()) {
+ auto containsVarArgIntrinsic = [](const Instruction &I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (const Function *Callee = CI->getCalledFunction())
+ return Callee->getIntrinsicID() == Intrinsic::vastart ||
+ Callee->getIntrinsicID() == Intrinsic::vaend;
+ return false;
+ };
+
+ for (auto &BB : *F) {
+ if (Blocks.count(&BB))
+ continue;
+ if (llvm::any_of(BB, containsVarArgIntrinsic))
+ return false;
+ }
+ }
+ return true;
+}
+
void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
const ValueSet &SinkCands) const {
for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
for (Instruction &II : *BB) {
- for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
- ++OI) {
- Value *V = *OI;
+ for (auto &OI : II.operands()) {
+ Value *V = OI;
if (!SinkCands.count(V) && definedInCaller(Blocks, V))
Inputs.insert(V);
}
@@ -904,12 +961,12 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// within the new function. This must be done before we lose track of which
// blocks were originally in the code region.
std::vector<User *> Users(header->user_begin(), header->user_end());
- for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ for (auto &U : Users)
// The BasicBlock which contains the branch is not in the region
// modify the branch target to a new block
- if (Instruction *I = dyn_cast<Instruction>(Users[i]))
- if (I->isTerminator() && !Blocks.count(I->getParent()) &&
- I->getParent()->getParent() == oldFunction)
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ if (I->isTerminator() && I->getFunction() == oldFunction &&
+ !Blocks.count(I->getParent()))
I->replaceUsesOfWith(header, newHeader);
return newFunction;
@@ -1277,13 +1334,6 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
// Insert this basic block into the new function
newBlocks.push_back(Block);
-
- // Remove @llvm.assume calls that were moved to the new function from the
- // old function's assumption cache.
- if (AC)
- for (auto &I : *Block)
- if (match(&I, m_Intrinsic<Intrinsic::assume>()))
- AC->unregisterAssumption(cast<CallInst>(&I));
}
}
@@ -1332,7 +1382,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
-Function *CodeExtractor::extractCodeRegion() {
+Function *
+CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
if (!isEligible())
return nullptr;
@@ -1341,27 +1392,6 @@ Function *CodeExtractor::extractCodeRegion() {
BasicBlock *header = *Blocks.begin();
Function *oldFunction = header->getParent();
- // For functions with varargs, check that varargs handling is only done in the
- // outlined function, i.e vastart and vaend are only used in outlined blocks.
- if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) {
- auto containsVarArgIntrinsic = [](Instruction &I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (const Function *F = CI->getCalledFunction())
- return F->getIntrinsicID() == Intrinsic::vastart ||
- F->getIntrinsicID() == Intrinsic::vaend;
- return false;
- };
-
- for (auto &BB : *oldFunction) {
- if (Blocks.count(&BB))
- continue;
- if (llvm::any_of(BB, containsVarArgIntrinsic))
- return nullptr;
- }
- }
- ValueSet inputs, outputs, SinkingCands, HoistingCands;
- BasicBlock *CommonExit = nullptr;
-
// Calculate the entry frequency of the new function before we change the root
// block.
BlockFrequency EntryFreq;
@@ -1375,6 +1405,15 @@ Function *CodeExtractor::extractCodeRegion() {
}
}
+ if (AC) {
+ // Remove @llvm.assume calls that were moved to the new function from the
+ // old function's assumption cache.
+ for (BasicBlock *Block : Blocks)
+ for (auto &I : *Block)
+ if (match(&I, m_Intrinsic<Intrinsic::assume>()))
+ AC->unregisterAssumption(cast<CallInst>(&I));
+ }
+
// If we have any return instructions in the region, split those blocks so
// that the return is not in the region.
splitReturnBlocks();
@@ -1428,7 +1467,9 @@ Function *CodeExtractor::extractCodeRegion() {
}
newFuncRoot->getInstList().push_back(BranchI);
- findAllocas(SinkingCands, HoistingCands, CommonExit);
+ ValueSet inputs, outputs, SinkingCands, HoistingCands;
+ BasicBlock *CommonExit = nullptr;
+ findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
assert(HoistingCands.empty() || CommonExit);
// Find inputs to, outputs from the code region.
@@ -1563,5 +1604,17 @@ Function *CodeExtractor::extractCodeRegion() {
});
LLVM_DEBUG(if (verifyFunction(*oldFunction))
report_fatal_error("verification of oldFunction failed!"));
+ LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC))
+ report_fatal_error("Stale Asumption cache for old Function!"));
return newFunction;
}
+
+bool CodeExtractor::verifyAssumptionCache(const Function& F,
+ AssumptionCache *AC) {
+ for (auto AssumeVH : AC->assumptions()) {
+ CallInst *I = cast<CallInst>(AssumeVH);
+ if (I->getFunction() != &F)
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 4aa40eeadda4..57e2ff0251a9 100644
--- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -24,7 +24,7 @@ static void insertCall(Function &CurFn, StringRef Func,
if (Func == "mcount" ||
Func == ".mcount" ||
- Func == "\01__gnu_mcount_nc" ||
+ Func == "llvm.arm.gnu.eabi.mcount" ||
Func == "\01_mcount" ||
Func == "\01mcount" ||
Func == "__mcount" ||
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index 0e203f4e075d..ad36790b8c6a 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -469,7 +469,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false; // Cannot handle array allocs.
}
Type *Ty = AI->getAllocatedType();
- AllocaTmps.push_back(llvm::make_unique<GlobalVariable>(
+ AllocaTmps.push_back(std::make_unique<GlobalVariable>(
Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty),
AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal,
AI->getType()->getPointerAddressSpace()));
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 0c52e6f3703b..893f23eb6048 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -67,7 +67,7 @@ public:
/// Before:
/// ......
/// %cmp10 = fcmp une float %tmp1, %tmp2
-/// br i1 %cmp1, label %if.then, label %lor.rhs
+/// br i1 %cmp10, label %if.then, label %lor.rhs
///
/// lor.rhs:
/// ......
@@ -251,8 +251,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
bool EverChanged = false;
for (; CurrBlock != FirstCondBlock;
CurrBlock = CurrBlock->getSinglePredecessor()) {
- BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
- CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ auto *BI = cast<BranchInst>(CurrBlock->getTerminator());
+ auto *CI = dyn_cast<CmpInst>(BI->getCondition());
if (!CI)
continue;
@@ -278,7 +278,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// Do the transformation.
BasicBlock *CB;
- BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+ BranchInst *PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
bool Iteration = true;
IRBuilder<>::InsertPointGuard Guard(Builder);
Value *PC = PBI->getCondition();
@@ -444,7 +444,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
FirstEntryBlock->getInstList().pop_back();
FirstEntryBlock->getInstList()
.splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
- BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+ BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator());
Value *CC = PBI->getCondition();
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
@@ -453,6 +453,16 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
PBI->replaceUsesOfWith(CC, NC);
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+ // Handle PHI node to replace its predecessors to FirstEntryBlock.
+ for (BasicBlock *Succ : successors(PBI)) {
+ for (PHINode &Phi : Succ->phis()) {
+ for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) {
+ if (Phi.getIncomingBlock(i) == SecondEntryBlock)
+ Phi.setIncomingBlock(i, FirstEntryBlock);
+ }
+ }
+ }
+
// Remove IfTrue1
if (IfTrue1 != FirstEntryBlock) {
IfTrue1->dropAllReferences();
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index c9cc0990f237..76b4635ad501 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -210,7 +210,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
if (Function *F = dyn_cast<Function>(&GV)) {
if (!F->isDeclaration()) {
for (auto &S : VI.getSummaryList()) {
- FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+ auto *FS = cast<FunctionSummary>(S->getBaseObject());
if (FS->modulePath() == M.getModuleIdentifier()) {
F->setEntryCount(Function::ProfileCount(FS->entryCount(),
Function::PCT_Synthetic));
diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 8041e66e6c4c..ea93f99d69e3 100644
--- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -25,8 +25,8 @@ ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
auto &ValueLookup = NodesMap[F.getName()];
if (!ValueLookup) {
- ValueLookup = llvm::make_unique<InlineGraphNode>();
- ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr;
+ ValueLookup = std::make_unique<InlineGraphNode>();
+ ValueLookup->Imported = F.hasMetadata("thinlto_src_module");
}
return *ValueLookup;
}
@@ -64,7 +64,7 @@ void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
if (F.isDeclaration())
continue;
AllFunctions++;
- ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr);
+ ImportedFunctions += int(F.hasMetadata("thinlto_src_module"));
}
}
static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All,
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 8c67d1dc6eb3..ed28fffc22b5 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -533,7 +533,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
}
bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
return runImpl(F, TLI, DT);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 39b6b889f91c..5bcd05757ec1 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -324,8 +324,14 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Value *Address = IBI->getAddress();
IBI->eraseFromParent();
if (DeleteDeadConditions)
+ // Delete pointer cast instructions.
RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
+ // Also zap the blockaddress constant if there are no users remaining,
+ // otherwise the destination is still marked as having its address taken.
+ if (BA->use_empty())
+ BA->destroyConstant();
+
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
// 'unreachable' instruction.
@@ -633,17 +639,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
// Control Flow Graph Restructuring.
//
-/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
-/// method is called when we're about to delete Pred as a predecessor of BB. If
-/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
-///
-/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
-/// nodes that collapse into identity values. For example, if we have:
-/// x = phi(1, 0, 0, 0)
-/// y = and x, z
-///
-/// .. and delete the predecessor corresponding to the '1', this will attempt to
-/// recursively fold the and to 0.
void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
DomTreeUpdater *DTU) {
// This only adjusts blocks with PHI nodes.
@@ -672,10 +667,6 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
}
-/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
-/// predecessor is known to have one successor (DestBB!). Eliminate the edge
-/// between them, moving the instructions in the predecessor into DestBB and
-/// deleting the predecessor block.
void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
DomTreeUpdater *DTU) {
@@ -755,15 +746,14 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
}
}
-/// CanMergeValues - Return true if we can choose one of these values to use
-/// in place of the other. Note that we will always choose the non-undef
-/// value to keep.
+/// Return true if we can choose one of these values to use in place of the
+/// other. Note that we will always choose the non-undef value to keep.
static bool CanMergeValues(Value *First, Value *Second) {
return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
}
-/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
+/// Return true if we can fold BB, an almost-empty BB ending in an unconditional
+/// branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
@@ -956,11 +946,6 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
replaceUndefValuesInPhi(PN, IncomingValues);
}
-/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
-/// unconditional branch, and contains no instructions other than PHI nodes,
-/// potential side-effect free intrinsics and the branch. If possible,
-/// eliminate BB by rewriting all the predecessors to branch to the successor
-/// block and return true. If we can't transform, return false.
bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
DomTreeUpdater *DTU) {
assert(BB != &BB->getParent()->getEntryBlock() &&
@@ -1088,10 +1073,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
return true;
}
-/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
-/// nodes in this block. This doesn't try to be clever about PHI nodes
-/// which differ only in the order of the incoming values, but instcombine
-/// orders them so it usually won't matter.
bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
// This implementation doesn't currently consider undef operands
// specially. Theoretically, two phis which are identical except for
@@ -1151,10 +1132,10 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// often possible though. If alignment is important, a more reliable approach
/// is to simply align all global variables and allocation instructions to
/// their preferred alignment from the beginning.
-static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+static unsigned enforceKnownAlignment(Value *V, unsigned Alignment,
unsigned PrefAlign,
const DataLayout &DL) {
- assert(PrefAlign > Align);
+ assert(PrefAlign > Alignment);
V = V->stripPointerCasts();
@@ -1165,36 +1146,36 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// stripPointerCasts recurses through infinite layers of bitcasts,
// while computeKnownBits is not allowed to traverse more than 6
// levels.
- Align = std::max(AI->getAlignment(), Align);
- if (PrefAlign <= Align)
- return Align;
+ Alignment = std::max(AI->getAlignment(), Alignment);
+ if (PrefAlign <= Alignment)
+ return Alignment;
// If the preferred alignment is greater than the natural stack alignment
// then don't round up. This avoids dynamic stack realignment.
- if (DL.exceedsNaturalStackAlignment(PrefAlign))
- return Align;
- AI->setAlignment(PrefAlign);
+ if (DL.exceedsNaturalStackAlignment(Align(PrefAlign)))
+ return Alignment;
+ AI->setAlignment(MaybeAlign(PrefAlign));
return PrefAlign;
}
if (auto *GO = dyn_cast<GlobalObject>(V)) {
// TODO: as above, this shouldn't be necessary.
- Align = std::max(GO->getAlignment(), Align);
- if (PrefAlign <= Align)
- return Align;
+ Alignment = std::max(GO->getAlignment(), Alignment);
+ if (PrefAlign <= Alignment)
+ return Alignment;
// If there is a large requested alignment and we can, bump up the alignment
// of the global. If the memory we set aside for the global may not be the
// memory used by the final program then it is impossible for us to reliably
// enforce the preferred alignment.
if (!GO->canIncreaseAlignment())
- return Align;
+ return Alignment;
- GO->setAlignment(PrefAlign);
+ GO->setAlignment(MaybeAlign(PrefAlign));
return PrefAlign;
}
- return Align;
+ return Alignment;
}
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
@@ -1397,7 +1378,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
/// Determine whether this alloca is either a VLA or an array.
static bool isArray(AllocaInst *AI) {
return AI->isArrayAllocation() ||
- AI->getType()->getElementType()->isArrayTy();
+ (AI->getAllocatedType() && AI->getAllocatedType()->isArrayTy());
+}
+
+/// Determine whether this alloca is a structure.
+static bool isStructure(AllocaInst *AI) {
+ return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy();
}
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
@@ -1422,7 +1408,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
// stored on the stack, while the dbg.declare can only describe
// the stack slot (and at a lexical-scope granularity). Later
// passes will attempt to elide the stack slot.
- if (!AI || isArray(AI))
+ if (!AI || isArray(AI) || isStructure(AI))
continue;
// A volatile load/store means that the alloca can't be elided anyway.
@@ -1591,15 +1577,10 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
DIExpr->getElement(0) != dwarf::DW_OP_deref)
return;
- // Insert the offset immediately after the first deref.
+ // Insert the offset before the first deref.
// We could just change the offset argument of dbg.value, but it's unsigned...
- if (Offset) {
- SmallVector<uint64_t, 4> Ops;
- Ops.push_back(dwarf::DW_OP_deref);
- DIExpression::appendOffset(Ops, Offset);
- Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
- DIExpr = Builder.createExpression(Ops);
- }
+ if (Offset)
+ DIExpr = DIExpression::prepend(DIExpr, 0, Offset);
Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
DVI->eraseFromParent();
@@ -1957,18 +1938,24 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
return NumInstrsRemoved;
}
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
- SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) {
+ SmallVector<Value *, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
- CallInst *NewCall = CallInst::Create(
- II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II);
- NewCall->takeName(II);
+ CallInst *NewCall = CallInst::Create(II->getFunctionType(),
+ II->getCalledValue(), Args, OpBundles);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
NewCall->setDebugLoc(II->getDebugLoc());
NewCall->copyMetadata(*II);
+ return NewCall;
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
+ CallInst *NewCall = createCallMatchingInvoke(II);
+ NewCall->takeName(II);
+ NewCall->insertBefore(II);
II->replaceAllUsesWith(NewCall);
// Follow the call by a branch to the normal destination.
@@ -2223,12 +2210,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
-/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
-/// after modifying the CFG.
-bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
- DomTreeUpdater *DTU,
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU) {
- SmallPtrSet<BasicBlock*, 16> Reachable;
+ SmallPtrSet<BasicBlock *, 16> Reachable;
bool Changed = markAliveBlocks(F, Reachable, DTU);
// If there are unreachable blocks in the CFG...
@@ -2236,21 +2221,21 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
return Changed;
assert(Reachable.size() < F.size());
- NumRemoved += F.size()-Reachable.size();
+ NumRemoved += F.size() - Reachable.size();
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
- auto *BB = &*I;
- if (Reachable.count(BB))
+ for (BasicBlock &BB : F) {
+ // Skip reachable basic blocks
+ if (Reachable.find(&BB) != Reachable.end())
continue;
- DeadBlockSet.insert(BB);
+ DeadBlockSet.insert(&BB);
}
if (MSSAU)
MSSAU->removeBlocks(DeadBlockSet);
// Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references. Update DTU and LVI if available.
+ // their internal references. Update DTU if available.
std::vector<DominatorTree::UpdateType> Updates;
for (auto *BB : DeadBlockSet) {
for (BasicBlock *Successor : successors(BB)) {
@@ -2259,26 +2244,18 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Successor});
}
- if (LVI)
- LVI->eraseBlock(BB);
BB->dropAllReferences();
- }
- for (Function::iterator I = ++F.begin(); I != F.end();) {
- auto *BB = &*I;
- if (Reachable.count(BB)) {
- ++I;
- continue;
- }
if (DTU) {
- // Remove the terminator of BB to clear the successor list of BB.
- if (BB->getTerminator())
- BB->getInstList().pop_back();
+ Instruction *TI = BB->getTerminator();
+ assert(TI && "Basic block should have a terminator");
+ // Terminators like invoke can have users. We have to replace their users,
+ // before removing them.
+ if (!TI->use_empty())
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->eraseFromParent();
new UnreachableInst(BB->getContext(), BB);
assert(succ_empty(BB) && "The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
- ++I;
- } else {
- I = F.getBasicBlockList().erase(I);
}
}
@@ -2294,7 +2271,11 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
}
if (!Deleted)
return false;
+ } else {
+ for (auto *BB : DeadBlockSet)
+ BB->eraseFromParent();
}
+
return true;
}
@@ -2363,6 +2344,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
+ case LLVMContext::MD_preserve_access_index:
+ // Preserve !preserve.access.index in K.
+ break;
}
}
// Set !invariant.group from J if J has it. If both instructions have it
@@ -2385,10 +2369,61 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
LLVMContext::MD_invariant_group, LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_access_group};
+ LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
combineMetadata(K, J, KnownIDs, KDominatesJ);
}
+void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ Source.getAllMetadata(MD);
+ MDBuilder MDB(Dest.getContext());
+ Type *NewType = Dest.getType();
+ const DataLayout &DL = Source.getModule()->getDataLayout();
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a load instruction changing *only its type*.
+ // The only metadata it makes sense to drop is metadata which is invalidated
+ // when the pointer type changes. This should essentially never be the case
+ // in LLVM, but we explicitly switch over only known metadata to be
+ // conservatively correct. If you are adding metadata to LLVM which pertains
+ // to loads, you almost certainly want to add it here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ case LLVMContext::MD_access_group:
+ // All of these directly apply.
+ Dest.setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_nonnull:
+ copyNonnullMetadata(Source, N, Dest);
+ break;
+
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ // These only directly apply if the new type is also a pointer.
+ if (NewType->isPointerTy())
+ Dest.setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_range:
+ copyRangeMetadata(DL, Source, N, Dest);
+ break;
+ }
+ }
+}
+
void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
auto *ReplInst = dyn_cast<Instruction>(Repl);
if (!ReplInst)
@@ -2417,7 +2452,7 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
LLVMContext::MD_noalias, LLVMContext::MD_range,
LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
- LLVMContext::MD_access_group};
+ LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
combineMetadata(ReplInst, I, KnownIDs, false);
}
diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp
index 37389a695b45..889ea5ca9970 100644
--- a/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -615,30 +615,9 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
- // Hoist the instructions from Latch into LastExit.
- Instruction *FirstLatchInst = &*(Latch->begin());
- LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
- Latch->begin(), Jmp->getIterator());
-
- // Update MemorySSA
- if (MSSAU)
- MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst);
-
- unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
- BasicBlock *Header = Jmp->getSuccessor(0);
- assert(Header == L->getHeader() && "expected a backward branch");
-
- // Remove Latch from the CFG so that LastExit becomes the new Latch.
- BI->setSuccessor(FallThruPath, Header);
- Latch->replaceSuccessorsPhiUsesWith(LastExit);
- Jmp->eraseFromParent();
-
- // Nuke the Latch block.
- assert(Latch->empty() && "unable to evacuate Latch");
- LI->removeBlock(Latch);
- if (DT)
- DT->eraseNode(Latch);
- Latch->eraseFromParent();
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr,
+ /*PredecessorWithTwoSuccessors=*/true);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 7e6da02d5707..d0f89dc54bfb 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -808,7 +808,7 @@ bool LoopSimplify::runOnFunction(Function &F) {
auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
if (MSSAAnalysis) {
MSSA = &MSSAAnalysis->getMSSA();
- MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
}
@@ -835,12 +835,19 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(F);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAAnalysis) {
+ auto *MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ }
+
// Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
- // after simplifying the loops. MemorySSA is not preserved either.
+ // after simplifying the loops. MemorySSA is preserved if it exists.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
Changed |=
- simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);
+ simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
@@ -853,6 +860,8 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<DependenceAnalysis>();
+ if (MSSAAnalysis)
+ PA.preserve<MemorySSAAnalysis>();
// BPI maps conditional terminators to probabilities, LoopSimplify can insert
// blocks, but it does so only by splitting existing blocks and edges. This
// results in the interesting property that all new terminators inserted are
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index e39ade523714..a7590fc32545 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -711,7 +711,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
ArrayRef<BasicBlock *> NextBlocks,
- BasicBlock *CurrentHeader,
+ BasicBlock *BlockInLoop,
bool NeedConditional) {
auto *Term = cast<BranchInst>(Src->getTerminator());
if (NeedConditional) {
@@ -723,7 +723,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (Dest != LoopExit) {
BasicBlock *BB = Src;
for (BasicBlock *Succ : successors(BB)) {
- if (Succ == CurrentHeader)
+ // Preserve the incoming value from BB if we are jumping to the block
+ // in the current loop.
+ if (Succ == BlockInLoop)
continue;
for (PHINode &Phi : Succ->phis())
Phi.removeIncomingValue(BB, false);
@@ -794,7 +796,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// unconditional branch for some iterations.
NeedConditional = false;
- setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
+ setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional);
}
// Set up latches to branch to the new header in the unrolled iterations or
@@ -868,7 +870,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
assert(!DT || !UnrollVerifyDomtree ||
DT->verify(DominatorTree::VerificationLevel::Fast));
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
@@ -888,6 +890,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
}
+ // Apply updates to the DomTree.
+ DT = &DTU.getDomTree();
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index ff49d83f25c5..bf2e87b0d49f 100644
--- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -517,6 +517,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
}
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
// Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
// new ones required.
if (Count != 1) {
@@ -530,7 +531,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
SubLoopBlocksLast.back(), AftBlocksFirst[0]);
- DT->applyUpdates(DTUpdates);
+ DTU.applyUpdatesPermissive(DTUpdates);
}
// Merge adjacent basic blocks, if possible.
@@ -538,7 +539,6 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
while (!MergeBlocks.empty()) {
BasicBlock *BB = *MergeBlocks.begin();
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
@@ -555,6 +555,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
} else
MergeBlocks.erase(BB);
}
+ // Apply updates to the DomTree.
+ DT = &DTU.getDomTree();
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 005306cf1898..58e42074f963 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -62,9 +62,11 @@ static cl::opt<unsigned> UnrollForcePeelCount(
cl::desc("Force a peel count regardless of profiling information."));
static cl::opt<bool> UnrollPeelMultiDeoptExit(
- "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+ "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
cl::desc("Allow peeling of loops with multiple deopt exits."));
+static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
@@ -275,6 +277,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");
UP.PeelCount = UnrollForcePeelCount;
+ UP.PeelProfiledIterations = true;
return;
}
@@ -282,6 +285,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!UP.AllowPeeling)
return;
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ // Stop if we already peeled off the maximum number of iterations.
+ if (AlreadyPeeled >= UnrollPeelMaxCount)
+ return;
+
// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
// iterations of the loop. For this we compute the number for iterations after
// which every Phi is guaranteed to become an invariant, and try to peel the
@@ -317,11 +327,14 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
- LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
- << " iteration(s) to turn"
- << " some Phis into invariants.\n");
- UP.PeelCount = DesiredPeelCount;
- return;
+ if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
+ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+ << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ UP.PeelCount = DesiredPeelCount;
+ UP.PeelProfiledIterations = false;
+ return;
+ }
}
}
@@ -330,6 +343,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (TripCount)
return;
+ // Do not apply profile base peeling if it is disabled.
+ if (!UP.PeelProfiledIterations)
+ return;
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
// hit the peeled section.
@@ -344,7 +360,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
<< "\n");
if (*PeelCount) {
- if ((*PeelCount <= UnrollPeelMaxCount) &&
+ if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
@@ -352,6 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
return;
}
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+ LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
<< "\n");
@@ -364,88 +381,77 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
/// iteration.
/// This sets the branch weights for the latch of the recently peeled off loop
/// iteration correctly.
-/// Our goal is to make sure that:
-/// a) The total weight of all the copies of the loop body is preserved.
-/// b) The total weight of the loop exit is preserved.
-/// c) The body weight is reasonably distributed between the peeled iterations.
+/// Let F is a weight of the edge from latch to header.
+/// Let E is a weight of the edge from latch to exit.
+/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
+/// go to exit.
+/// Then, Estimated TripCount = F / E.
+/// For I-th (counting from 0) peeled off iteration we set the the weights for
+/// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
+/// The probability to go to exit 1/(TC-I) increases. At the same time
+/// the estimated trip count of remaining loop reduces by I.
+/// To avoid dealing with division rounding we can just multiple both part
+/// of weights to E and use weight as (F - I * E, E).
///
/// \param Header The copy of the header block that belongs to next iteration.
/// \param LatchBR The copy of the latch branch that belongs to this iteration.
-/// \param IterNumber The serial number of the iteration that was just
-/// peeled off.
-/// \param AvgIters The average number of iterations we expect the loop to have.
-/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
-/// iterations that are unaccounted for. As an input, it represents the number
-/// of times we expect to enter the header of the iteration currently being
-/// peeled off. The output is the number of times we expect to enter the
-/// header of the next iteration.
+/// \param[in,out] FallThroughWeight The weight of the edge from latch to
+/// header before peeling (in) and after peeled off one iteration (out).
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- unsigned IterNumber, unsigned AvgIters,
- uint64_t &PeeledHeaderWeight) {
- if (!PeeledHeaderWeight)
+ uint64_t ExitWeight,
+ uint64_t &FallThroughWeight) {
+ // FallThroughWeight is 0 means that there is no branch weights on original
+ // latch block or estimated trip count is zero.
+ if (!FallThroughWeight)
return;
- // FIXME: Pick a more realistic distribution.
- // Currently the proportion of weight we assign to the fall-through
- // side of the branch drops linearly with the iteration number, and we use
- // a 0.9 fudge factor to make the drop-off less sharp...
- uint64_t FallThruWeight =
- PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
- uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
- PeeledHeaderWeight -= ExitWeight;
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
MDBuilder MDB(LatchBR->getContext());
MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
- : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
+ : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+ FallThroughWeight =
+ FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
}
/// Initialize the weights.
///
/// \param Header The header block.
/// \param LatchBR The latch branch.
-/// \param AvgIters The average number of iterations we expect the loop to have.
-/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
-/// \param[out] CurHeaderWeight The # of times the header is executed.
+/// \param[out] ExitWeight The weight of the edge from Latch to Exit.
+/// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- unsigned AvgIters, uint64_t &ExitWeight,
- uint64_t &CurHeaderWeight) {
+ uint64_t &ExitWeight,
+ uint64_t &FallThroughWeight) {
uint64_t TrueWeight, FalseWeight;
if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
return;
unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- // The # of times the loop body executes is the sum of the exit block
- // is taken and the # of times the backedges are taken.
- CurHeaderWeight = TrueWeight + FalseWeight;
+ FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
}
/// Update the weights of original Latch block after peeling off all iterations.
///
/// \param Header The header block.
/// \param LatchBR The latch branch.
-/// \param ExitWeight The weight of the edge from Latch to Exit block.
-/// \param CurHeaderWeight The # of time the header is executed.
+/// \param ExitWeight The weight of the edge from Latch to Exit.
+/// \param FallThroughWeight The weight of the edge from Latch to Header.
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- uint64_t ExitWeight, uint64_t CurHeaderWeight) {
- // Adjust the branch weights on the loop exit.
- if (!ExitWeight)
+ uint64_t ExitWeight,
+ uint64_t FallThroughWeight) {
+ // FallThroughWeight is 0 means that there is no branch weights on original
+ // latch block or estimated trip count is zero.
+ if (!FallThroughWeight)
return;
- // The backedge count is the difference of current header weight and
- // current loop exit weight. If the current header weight is smaller than
- // the current loop exit weight, we mark the loop backedge weight as 1.
- uint64_t BackEdgeWeight = 0;
- if (ExitWeight < CurHeaderWeight)
- BackEdgeWeight = CurHeaderWeight - ExitWeight;
- else
- BackEdgeWeight = 1;
+ // Sets the branch weights on the loop exit.
MDBuilder MDB(LatchBR->getContext());
unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
+ : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
@@ -586,11 +592,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
if (DT) {
+ // We'd like to determine the idom of exit block after peeling one
+ // iteration.
+ // Let Exit is exit block.
+ // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
+ // blocks.
+ // Let Latch' and ExitingSet' are copies after a peeling.
+ // We'd like to find an idom'(Exit) - idom of Exit after peeling.
+ // It is an evident that idom'(Exit) will be the nearest common dominator
+ // of ExitingSet and ExitingSet'.
+ // idom(Exit) is a nearest common dominator of ExitingSet.
+ // idom(Exit)' is a nearest common dominator of ExitingSet'.
+ // Taking into account that we have a single Latch, Latch' will dominate
+ // Header and idom(Exit).
+ // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
+ // All these basic blocks are in the same loop, so what we find is
+ // (nearest common dominator of idom(Exit) and Latch)'.
+ // In the loop below we remember nearest common dominator of idom(Exit) and
+ // Latch to update idom of Exit later.
assert(L->hasDedicatedExits() && "No dedicated exits?");
for (auto Edge : ExitEdges) {
if (ExitIDom.count(Edge.second))
continue;
- BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
+ BasicBlock *BB = DT->findNearestCommonDominator(
+ DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
assert(L->contains(BB) && "IDom is not in a loop");
ExitIDom[Edge.second] = BB;
}
@@ -659,23 +684,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// newly created branches.
BranchInst *LatchBR =
cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
- uint64_t ExitWeight = 0, CurHeaderWeight = 0;
- initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
+ uint64_t ExitWeight = 0, FallThroughWeight = 0;
+ initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;
- // Subtract the exit weight from the current header weight -- the exit
- // weight is exactly the weight of the previous iteration's header.
- // FIXME: due to the way the distribution is constructed, we need a
- // guard here to make sure we don't end up with non-positive weights.
- if (ExitWeight < CurHeaderWeight)
- CurHeaderWeight -= ExitWeight;
- else
- CurHeaderWeight = 1;
-
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
LoopBlocks, VMap, LVMap, DT, LI);
@@ -697,8 +713,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
}
auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
- updateBranchWeights(InsertBot, LatchBRCopy, Iter,
- PeelCount, ExitWeight);
+ updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
// Remove Loop metadata from the latch branch instruction
// because it is not the Loop's latch branch anymore.
LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
@@ -724,7 +739,13 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
}
- fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
+ fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
+
+ // Update Metadata for count of peeled off iterations.
+ unsigned AlreadyPeeled = 0;
+ if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+ AlreadyPeeled = *Peeled;
+ addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
if (Loop *ParentLoop = L->getParentLoop())
L = ParentLoop;
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index ec226e65f650..b4d7f35d2d9a 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -45,6 +46,7 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-utils"
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
+static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
@@ -169,6 +171,8 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<SCEVAAWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
+ // FIXME: When all loop passes preserve MemorySSA, it can be required and
+ // preserved here instead of the individual handling in each pass.
}
/// Manually defined generic "LoopPass" dependency initialization. This is used
@@ -189,6 +193,54 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+}
+
+/// Create MDNode for input string.
+static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Metadata *MDs[] = {
+ MDString::get(Context, Name),
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
+ return MDNode::get(Context, MDs);
+}
+
+/// Set input string into loop metadata by keeping other values intact.
+/// If the string is already in loop metadata update value if it is
+/// different.
+void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
+ unsigned V) {
+ SmallVector<Metadata *, 4> MDs(1);
+ // If the loop already has metadata, retain it.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ // If it is of form key = value, try to parse it.
+ if (Node->getNumOperands() == 2) {
+ MDString *S = dyn_cast<MDString>(Node->getOperand(0));
+ if (S && S->getString().equals(StringMD)) {
+ ConstantInt *IntMD =
+ mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
+ if (IntMD && IntMD->getSExtValue() == V)
+ // It is already in place. Do nothing.
+ return;
+ // We need to update the value, so just skip it here and it will
+ // be added after copying other existed nodes.
+ continue;
+ }
+ }
+ MDs.push_back(Node);
+ }
+ }
+ // Add new metadata.
+ MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ TheLoop->setLoopID(NewLoopID);
}
/// Find string metadata for loop
@@ -332,6 +384,10 @@ bool llvm::hasDisableAllTransformsHint(const Loop *L) {
return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
}
+bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
+}
+
TransformationMode llvm::hasUnrollTransformation(Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
return TM_SuppressedByUser;
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
index a9a480a4b7f9..5d7759056c7d 100644
--- a/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -92,8 +92,8 @@ void LoopVersioning::versionLoop(
// Create empty preheader for the loop (and after cloning for the
// non-versioned loop).
BasicBlock *PH =
- SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI);
- PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
+ SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI,
+ nullptr, VersionedLoop->getHeader()->getName() + ".ph");
// Clone the loop including the preheader.
//
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index c0b7edc547fd..60bb2775a194 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -121,15 +121,14 @@ namespace {
}
// Rename all functions
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
for (auto &F : M) {
StringRef Name = F.getName();
LibFunc Tmp;
// Leave library functions alone because their presence or absence could
// affect the behavior of other passes.
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
- TLI.getLibFunc(F, Tmp))
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F).getLibFunc(
+ F, Tmp))
continue;
// Leave @main alone. The output of -metarenamer might be passed to
diff --git a/lib/Transforms/Utils/MisExpect.cpp b/lib/Transforms/Utils/MisExpect.cpp
new file mode 100644
index 000000000000..26d3402bd279
--- /dev/null
+++ b/lib/Transforms/Utils/MisExpect.cpp
@@ -0,0 +1,177 @@
+//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of the
+// llvm.expect intrinsic. This utility extracts the threshold values from
+// metadata associated with the instrumented Branch or Switch instruction. The
+// threshold values are then used to determine if a warning should be emmited.
+//
+// MisExpect metadata is generated when llvm.expect intrinsics are lowered see
+// LowerExpectIntrinsic.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MisExpect.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <cstdint>
+#include <functional>
+#include <numeric>
+
+#define DEBUG_TYPE "misexpect"
+
+using namespace llvm;
+using namespace misexpect;
+
+namespace llvm {
+
+// Command line option to enable/disable the warning when profile data suggests
+// a mismatch with the use of the llvm.expect intrinsic
+static cl::opt<bool> PGOWarnMisExpect(
+ "pgo-warn-misexpect", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about incorrect usage of llvm.expect intrinsics."));
+
+} // namespace llvm
+
+namespace {
+
+Instruction *getOprndOrInst(Instruction *I) {
+ assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
+ Instruction *Ret = nullptr;
+ if (auto *B = dyn_cast<BranchInst>(I)) {
+ Ret = dyn_cast<Instruction>(B->getCondition());
+ }
+ // TODO: Find a way to resolve condition location for switches
+ // Using the condition of the switch seems to often resolve to an earlier
+ // point in the program, i.e. the calculation of the switch condition, rather
+ // than the switches location in the source code. Thus, we should use the
+ // instruction to get source code locations rather than the condition to
+ // improve diagnostic output, such as the caret. If the same problem exists
+ // for branch instructions, then we should remove this function and directly
+ // use the instruction
+ //
+ // else if (auto S = dyn_cast<SwitchInst>(I)) {
+ // Ret = I;
+ //}
+ return Ret ? Ret : I;
+}
+
+void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
+ uint64_t ProfCount, uint64_t TotalCount) {
+ double PercentageCorrect = (double)ProfCount / TotalCount;
+ auto PerString =
+ formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
+ auto RemStr = formatv(
+ "Potential performance regression from use of the llvm.expect intrinsic: "
+ "Annotation was correct on {0} of profiled executions.",
+ PerString);
+ Twine Msg(PerString);
+ Instruction *Cond = getOprndOrInst(I);
+ if (PGOWarnMisExpect)
+ Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
+ OptimizationRemarkEmitter ORE(I->getParent()->getParent());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
+}
+
+} // namespace
+
+namespace llvm {
+namespace misexpect {
+
+void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights,
+ LLVMContext &Ctx) {
+ if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) {
+ auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0));
+ if (MisExpectDataName &&
+ MisExpectDataName->getString().equals("misexpect")) {
+ LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+ LLVM_DEBUG(llvm::dbgs()
+ << "Function: " << I->getFunction()->getName() << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n");
+ LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) {
+ llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n";
+ });
+
+ // extract values from misexpect metadata
+ const auto *IndexCint =
+ mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1));
+ const auto *LikelyCInt =
+ mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2));
+ const auto *UnlikelyCInt =
+ mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3));
+
+ if (!IndexCint || !LikelyCInt || !UnlikelyCInt)
+ return;
+
+ const uint64_t Index = IndexCint->getZExtValue();
+ const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue();
+ const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue();
+ const uint64_t ProfileCount = Weights[Index];
+ const uint64_t CaseTotal = std::accumulate(
+ Weights.begin(), Weights.end(), (uint64_t)0, std::plus<uint64_t>());
+ const uint64_t NumUnlikelyTargets = Weights.size() - 1;
+
+ const uint64_t TotalBranchWeight =
+ LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
+
+ const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
+ TotalBranchWeight);
+ uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal);
+
+ LLVM_DEBUG(llvm::dbgs()
+ << "Unlikely Targets: " << NumUnlikelyTargets << ":\n");
+ LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n");
+ LLVM_DEBUG(llvm::dbgs()
+ << "Scaled Threshold: " << ScaledThreshold << ":\n");
+ LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+ if (ProfileCount < ScaledThreshold)
+ emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal);
+ }
+ }
+}
+
+void checkFrontendInstrumentation(Instruction &I) {
+ if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) {
+ unsigned NOps = MD->getNumOperands();
+
+ // Only emit misexpect diagnostics if at least 2 branch weights are present.
+ // Less than 2 branch weights means that the profiling metadata is:
+ // 1) incorrect/corrupted
+ // 2) not branch weight metadata
+ // 3) completely deterministic
+ // In these cases we should not emit any diagnostic related to misexpect.
+ if (NOps < 3)
+ return;
+
+ // Operand 0 is a string tag "branch_weights"
+ if (MDString *Tag = cast<MDString>(MD->getOperand(0))) {
+ if (Tag->getString().equals("branch_weights")) {
+ SmallVector<uint32_t, 4> RealWeights(NOps - 1);
+ for (unsigned i = 1; i < NOps; i++) {
+ ConstantInt *Value =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
+ RealWeights[i - 1] = Value->getZExtValue();
+ }
+ verifyMisExpect(&I, RealWeights, I.getContext());
+ }
+ }
+ }
+}
+
+} // namespace misexpect
+} // namespace llvm
+#undef DEBUG_TYPE
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index c84beceee191..1ef3757017a8 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -73,7 +73,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *>
SmallPtrSet<Constant *, 16> InitAsSet;
SmallVector<Constant *, 16> Init;
if (GV) {
- ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ auto *CA = cast<ConstantArray>(GV->getInitializer());
for (auto &Op : CA->operands()) {
Constant *C = cast_or_null<Constant>(Op);
if (InitAsSet.insert(C).second)
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index bdf24d80bd17..44859eafb9c1 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -125,8 +125,10 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
// necessary to compare uses/defs in the same block. Doing so allows us to walk
// the minimum number of instructions necessary to compute our def/use ordering.
struct ValueDFS_Compare {
+ DominatorTree &DT;
OrderedInstructions &OI;
- ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {}
+ ValueDFS_Compare(DominatorTree &DT, OrderedInstructions &OI)
+ : DT(DT), OI(OI) {}
bool operator()(const ValueDFS &A, const ValueDFS &B) const {
if (&A == &B)
@@ -136,7 +138,9 @@ struct ValueDFS_Compare {
// comesbefore to see what the real ordering is, because they are in the
// same basic block.
- bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
+ assert((A.DFSIn != B.DFSIn || A.DFSOut == B.DFSOut) &&
+ "Equal DFS-in numbers imply equal out numbers");
+ bool SameBlock = A.DFSIn == B.DFSIn;
// We want to put the def that will get used for a given set of phi uses,
// before those phi uses.
@@ -145,9 +149,11 @@ struct ValueDFS_Compare {
if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
return comparePHIRelated(A, B);
+ bool isADef = A.Def;
+ bool isBDef = B.Def;
if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
- return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) <
- std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U);
+ return std::tie(A.DFSIn, A.LocalNum, isADef) <
+ std::tie(B.DFSIn, B.LocalNum, isBDef);
return localComesBefore(A, B);
}
@@ -164,10 +170,35 @@ struct ValueDFS_Compare {
// For two phi related values, return the ordering.
bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
- auto &ABlockEdge = getBlockEdge(A);
- auto &BBlockEdge = getBlockEdge(B);
- // Now sort by block edge and then defs before uses.
- return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U);
+ BasicBlock *ASrc, *ADest, *BSrc, *BDest;
+ std::tie(ASrc, ADest) = getBlockEdge(A);
+ std::tie(BSrc, BDest) = getBlockEdge(B);
+
+#ifndef NDEBUG
+ // This function should only be used for values in the same BB, check that.
+ DomTreeNode *DomASrc = DT.getNode(ASrc);
+ DomTreeNode *DomBSrc = DT.getNode(BSrc);
+ assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn &&
+ "DFS numbers for A should match the ones of the source block");
+ assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn &&
+ "DFS numbers for B should match the ones of the source block");
+ assert(A.DFSIn == B.DFSIn && "Values must be in the same block");
+#endif
+ (void)ASrc;
+ (void)BSrc;
+
+ // Use DFS numbers to compare destination blocks, to guarantee a
+ // deterministic order.
+ DomTreeNode *DomADest = DT.getNode(ADest);
+ DomTreeNode *DomBDest = DT.getNode(BDest);
+ unsigned AIn = DomADest->getDFSNumIn();
+ unsigned BIn = DomBDest->getDFSNumIn();
+ bool isADef = A.Def;
+ bool isBDef = B.Def;
+ assert((!A.Def || !A.U) && (!B.Def || !B.U) &&
+ "Def and U cannot be set at the same time");
+ // Now sort by edge destination and then defs before uses.
+ return std::tie(AIn, isADef) < std::tie(BIn, isBDef);
}
// Get the definition of an instruction that occurs in the middle of a block.
@@ -306,10 +337,11 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
}
// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
-void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
+void PredicateInfo::addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
PredicateBase *PB) {
- OpsToRename.insert(Op);
auto &OperandInfo = getOrCreateValueInfo(Op);
+ if (OperandInfo.Infos.empty())
+ OpsToRename.push_back(Op);
AllInfos.push_back(PB);
OperandInfo.Infos.push_back(PB);
}
@@ -317,7 +349,7 @@ void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
// Process an assume instruction and place relevant operations we want to rename
// into OpsToRename.
void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
+ SmallVectorImpl<Value *> &OpsToRename) {
// See if we have a comparison we support
SmallVector<Value *, 8> CmpOperands;
SmallVector<Value *, 2> ConditionsToProcess;
@@ -357,7 +389,7 @@ void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
// Process a block terminating branch, and place relevant operations to be
// renamed into OpsToRename.
void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
+ SmallVectorImpl<Value *> &OpsToRename) {
BasicBlock *FirstBB = BI->getSuccessor(0);
BasicBlock *SecondBB = BI->getSuccessor(1);
SmallVector<BasicBlock *, 2> SuccsToProcess;
@@ -427,7 +459,7 @@ void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
// Process a block terminating switch, and place relevant operations to be
// renamed into OpsToRename.
void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
+ SmallVectorImpl<Value *> &OpsToRename) {
Value *Op = SI->getCondition();
if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
return;
@@ -457,7 +489,7 @@ void PredicateInfo::buildPredicateInfo() {
DT.updateDFSNumbers();
// Collect operands to rename from all conditional branch terminators, as well
// as assume statements.
- SmallPtrSet<Value *, 8> OpsToRename;
+ SmallVector<Value *, 8> OpsToRename;
for (auto DTN : depth_first(DT.getRootNode())) {
BasicBlock *BranchBB = DTN->getBlock();
if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
@@ -524,7 +556,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (empty(IF->users()))
+ if (IF->users().empty())
CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
@@ -536,7 +568,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
"Should not have gotten here without it being an assume");
IRBuilder<> B(PAssume->AssumeInst);
Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (empty(IF->users()))
+ if (IF->users().empty())
CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PredicateMap.insert({PIC, ValInfo});
@@ -565,14 +597,8 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
//
// TODO: Use this algorithm to perform fast single-variable renaming in
// promotememtoreg and memoryssa.
-void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
- // Sort OpsToRename since we are going to iterate it.
- SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
- auto Comparator = [&](const Value *A, const Value *B) {
- return valueComesBefore(OI, A, B);
- };
- llvm::sort(OpsToRename, Comparator);
- ValueDFS_Compare Compare(OI);
+void PredicateInfo::renameUses(SmallVectorImpl<Value *> &OpsToRename) {
+ ValueDFS_Compare Compare(DT, OI);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n");
@@ -772,7 +798,7 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(dbgs());
if (VerifyPredicateInfo)
PredInfo->verifyPredicateInfo();
@@ -786,7 +812,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
OS << "PredicateInfo for function: " << F.getName() << "\n";
- auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(OS);
replaceCreatedSSACopys(*PredInfo, F);
@@ -845,7 +871,7 @@ PreservedAnalyses PredicateInfoVerifierPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
+ std::make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
return PreservedAnalyses::all();
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 11651d040dc0..3a5e3293ed4f 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -94,6 +94,12 @@ static cl::opt<unsigned> PHINodeFoldingThreshold(
cl::desc(
"Control the amount of phi node folding to perform (default = 2)"));
+static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
+ "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Control the maximal total instruction cost that we are willing "
+ "to speculatively execute to fold a 2-entry PHI node into a "
+ "select (default = 4)"));
+
static cl::opt<bool> DupRet(
"simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
@@ -332,7 +338,7 @@ static unsigned ComputeSpeculationCost(const User *I,
/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction *> &AggressiveInsts,
- unsigned &CostRemaining,
+ int &BudgetRemaining,
const TargetTransformInfo &TTI,
unsigned Depth = 0) {
// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
@@ -375,7 +381,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
if (!isSafeToSpeculativelyExecute(I))
return false;
- unsigned Cost = ComputeSpeculationCost(I, TTI);
+ BudgetRemaining -= ComputeSpeculationCost(I, TTI);
// Allow exactly one instruction to be speculated regardless of its cost
// (as long as it is safe to do so).
@@ -383,17 +389,14 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// or other expensive operation. The speculation of an expensive instruction
// is expected to be undone in CodeGenPrepare if the speculation has not
// enabled further IR optimizations.
- if (Cost > CostRemaining &&
+ if (BudgetRemaining < 0 &&
(!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
return false;
- // Avoid unsigned wrap.
- CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
-
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI,
Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
@@ -629,8 +632,7 @@ private:
/// vector.
/// One "Extra" case is allowed to differ from the other.
void gather(Value *V) {
- Instruction *I = dyn_cast<Instruction>(V);
- bool isEQ = (I->getOpcode() == Instruction::Or);
+ bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or);
// Keep a stack (SmallVector for efficiency) for depth-first traversal
SmallVector<Value *, 8> DFT;
@@ -1313,7 +1315,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
LLVMContext::MD_mem_parallel_loop_access,
- LLVMContext::MD_access_group};
+ LLVMContext::MD_access_group,
+ LLVMContext::MD_preserve_access_index};
combineMetadata(I1, I2, KnownIDs, true);
// I1 and I2 are being combined into a single instruction. Its debug
@@ -1420,6 +1423,20 @@ HoistTerminator:
return true;
}
+// Check lifetime markers.
+static bool isLifeTimeMarker(const Instruction *I) {
+ if (auto II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ return false;
+}
+
// All instructions in Insts belong to different blocks that all unconditionally
// branch to a common successor. Analyze each instruction and return true if it
// would be possible to sink them into their successor, creating one common
@@ -1474,20 +1491,25 @@ static bool canSinkInstructions(
return false;
}
- // Because SROA can't handle speculating stores of selects, try not
- // to sink loads or stores of allocas when we'd have to create a PHI for
- // the address operand. Also, because it is likely that loads or stores
- // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
+ // Because SROA can't handle speculating stores of selects, try not to sink
+ // loads, stores or lifetime markers of allocas when we'd have to create a
+ // PHI for the address operand. Also, because it is likely that loads or
+ // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
+ // them.
// This can cause code churn which can have unintended consequences down
// the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
// FIXME: This is a workaround for a deficiency in SROA - see
// https://llvm.org/bugs/show_bug.cgi?id=30188
if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(1));
+ return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
}))
return false;
if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(0));
+ return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
+ }))
+ return false;
+ if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
}))
return false;
@@ -1959,7 +1981,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
- unsigned SpeculationCost = 0;
+ unsigned SpeculatedInstructions = 0;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
for (BasicBlock::iterator BBI = ThenBB->begin(),
@@ -1974,8 +1996,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Only speculatively execute a single instruction (not counting the
// terminator) for now.
- ++SpeculationCost;
- if (SpeculationCost > 1)
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
return false;
// Don't hoist the instruction if it's unsafe or expensive.
@@ -2012,8 +2034,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
E = SinkCandidateUseCounts.end();
I != E; ++I)
if (I->first->hasNUses(I->second)) {
- ++SpeculationCost;
- if (SpeculationCost > 1)
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
return false;
}
@@ -2053,8 +2075,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// getting expanded into Instructions.
// FIXME: This doesn't account for how many operations are combined in the
// constant expression.
- ++SpeculationCost;
- if (SpeculationCost > 1)
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
return false;
}
@@ -2302,10 +2324,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction *, 4> AggressiveInsts;
- unsigned MaxCostVal0 = PHINodeFoldingThreshold,
- MaxCostVal1 = PHINodeFoldingThreshold;
- MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
- MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
+ int BudgetRemaining =
+ TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
@@ -2316,9 +2336,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
- MaxCostVal0, TTI) ||
+ BudgetRemaining, TTI) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
- MaxCostVal1, TTI))
+ BudgetRemaining, TTI))
return false;
}
@@ -2328,12 +2348,24 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
if (!PN)
return true;
- // Don't fold i1 branches on PHIs which contain binary operators. These can
- // often be turned into switches and other things.
+ // Return true if at least one of these is a 'not', and another is either
+ // a 'not' too, or a constant.
+ auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
+ if (!match(V0, m_Not(m_Value())))
+ std::swap(V0, V1);
+ auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
+ return match(V0, m_Not(m_Value())) && match(V1, Invertible);
+ };
+
+ // Don't fold i1 branches on PHIs which contain binary operators, unless one
+ // of the incoming values is an 'not' and another one is freely invertible.
+ // These can often be turned into switches and other things.
if (PN->getType()->isIntegerTy(1) &&
(isa<BinaryOperator>(PN->getIncomingValue(0)) ||
isa<BinaryOperator>(PN->getIncomingValue(1)) ||
- isa<BinaryOperator>(IfCond)))
+ isa<BinaryOperator>(IfCond)) &&
+ !CanHoistNotFromBothValues(PN->getIncomingValue(0),
+ PN->getIncomingValue(1)))
return false;
// If all PHI nodes are promotable, check to make sure that all instructions
@@ -2368,6 +2400,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
return false;
}
}
+ assert(DomBlock && "Failed to find root DomBlock");
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
<< " T: " << IfTrue->getName()
@@ -2913,42 +2946,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
BasicBlock *QTB, BasicBlock *QFB,
BasicBlock *PostBB, Value *Address,
bool InvertPCond, bool InvertQCond,
- const DataLayout &DL) {
- auto IsaBitcastOfPointerType = [](const Instruction &I) {
- return Operator::getOpcode(&I) == Instruction::BitCast &&
- I.getType()->isPointerTy();
- };
-
- // If we're not in aggressive mode, we only optimize if we have some
- // confidence that by optimizing we'll allow P and/or Q to be if-converted.
- auto IsWorthwhile = [&](BasicBlock *BB) {
- if (!BB)
- return true;
- // Heuristic: if the block can be if-converted/phi-folded and the
- // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
- // thread this store.
- unsigned N = 0;
- for (auto &I : BB->instructionsWithoutDebug()) {
- // Cheap instructions viable for folding.
- if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
- isa<StoreInst>(I))
- ++N;
- // Free instructions.
- else if (I.isTerminator() || IsaBitcastOfPointerType(I))
- continue;
- else
- return false;
- }
- // The store we want to merge is counted in N, so add 1 to make sure
- // we're counting the instructions that would be left.
- return N <= (PHINodeFoldingThreshold + 1);
- };
-
- if (!MergeCondStoresAggressively &&
- (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) ||
- !IsWorthwhile(QFB)))
- return false;
-
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
// For every pointer, there must be exactly two stores, one coming from
// PTB or PFB, and the other from QTB or QFB. We don't support more than one
// store (to any address) in PTB,PFB or QTB,QFB.
@@ -2989,6 +2988,46 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
if (&*I != PStore && I->mayReadOrWriteMemory())
return false;
+ // If we're not in aggressive mode, we only optimize if we have some
+ // confidence that by optimizing we'll allow P and/or Q to be if-converted.
+ auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
+ if (!BB)
+ return true;
+ // Heuristic: if the block can be if-converted/phi-folded and the
+ // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
+ // thread this store.
+ int BudgetRemaining =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ for (auto &I : BB->instructionsWithoutDebug()) {
+ // Consider terminator instruction to be free.
+ if (I.isTerminator())
+ continue;
+ // If this is one the stores that we want to speculate out of this BB,
+ // then don't count it's cost, consider it to be free.
+ if (auto *S = dyn_cast<StoreInst>(&I))
+ if (llvm::find(FreeStores, S))
+ continue;
+ // Else, we have a white-list of instructions that we are ak speculating.
+ if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
+ return false; // Not in white-list - not worthwhile folding.
+ // And finally, if this is a non-free instruction that we are okay
+ // speculating, ensure that we consider the speculation budget.
+ BudgetRemaining -= TTI.getUserCost(&I);
+ if (BudgetRemaining < 0)
+ return false; // Eagerly refuse to fold as soon as we're out of budget.
+ }
+ assert(BudgetRemaining >= 0 &&
+ "When we run out of budget we will eagerly return from within the "
+ "per-instruction loop.");
+ return true;
+ };
+
+ const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore};
+ if (!MergeCondStoresAggressively &&
+ (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
+ !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
+ return false;
+
// If PostBB has more than two predecessors, we need to split it so we can
// sink the store.
if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
@@ -3048,15 +3087,15 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
// store that doesn't execute.
if (MinAlignment != 0) {
// Choose the minimum of all non-zero alignments.
- SI->setAlignment(MinAlignment);
+ SI->setAlignment(Align(MinAlignment));
} else if (MaxAlignment != 0) {
// Choose the minimal alignment between the non-zero alignment and the ABI
// default alignment for the type of the stored value.
- SI->setAlignment(std::min(MaxAlignment, TypeAlignment));
+ SI->setAlignment(Align(std::min(MaxAlignment, TypeAlignment)));
} else {
// If both alignments are zero, use ABI default alignment for the type of
// the stored value.
- SI->setAlignment(TypeAlignment);
+ SI->setAlignment(Align(TypeAlignment));
}
QStore->eraseFromParent();
@@ -3066,7 +3105,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
}
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
- const DataLayout &DL) {
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
// The intention here is to find diamonds or triangles (see below) where each
// conditional block contains a store to the same address. Both of these
// stores are conditional, so they can't be unconditionally sunk. But it may
@@ -3168,7 +3208,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
bool Changed = false;
for (auto *Address : CommonAddresses)
Changed |= mergeConditionalStoreToAddress(
- PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL);
+ PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI);
return Changed;
}
@@ -3177,7 +3217,8 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
/// that PBI and BI are both conditional branches, and BI is in one of the
/// successor blocks of PBI - PBI branches to BI.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
- const DataLayout &DL) {
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
assert(PBI->isConditional() && BI->isConditional());
BasicBlock *BB = BI->getParent();
@@ -3233,7 +3274,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
- if (MergeCondStores && mergeConditionalStores(PBI, BI, DL))
+ if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI))
return true;
// If this is a conditional branch in an empty block, and if any
@@ -3697,12 +3738,17 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
BasicBlock *BB = BI->getParent();
+ // MSAN does not like undefs as branch condition which can be introduced
+ // with "explicit branch".
+ if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
+ return false;
+
LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
<< " cases into SWITCH. BB is:\n"
<< *BB);
// If there are any extra values that couldn't be folded into the switch
- // then we evaluate them with an explicit branch first. Split the block
+ // then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
if (ExtraCase) {
BasicBlock *NewBB =
@@ -3851,7 +3897,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
// Simplify resume that is only used by a single (non-phi) landing pad.
bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
- LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+ auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
assert(RI->getValue() == LPInst &&
"Resume must unwind the exception that caused control to here");
@@ -4178,23 +4224,22 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
IRBuilder<> Builder(TI);
if (auto *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isUnconditional()) {
- if (BI->getSuccessor(0) == BB) {
- new UnreachableInst(TI->getContext(), TI);
- TI->eraseFromParent();
- Changed = true;
- }
+ assert(BI->getSuccessor(0) == BB && "Incorrect CFG");
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
} else {
Value* Cond = BI->getCondition();
if (BI->getSuccessor(0) == BB) {
Builder.CreateAssumption(Builder.CreateNot(Cond));
Builder.CreateBr(BI->getSuccessor(1));
- EraseTerminatorAndDCECond(BI);
- } else if (BI->getSuccessor(1) == BB) {
+ } else {
+ assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
Builder.CreateAssumption(Cond);
Builder.CreateBr(BI->getSuccessor(0));
- EraseTerminatorAndDCECond(BI);
- Changed = true;
}
+ EraseTerminatorAndDCECond(BI);
+ Changed = true;
}
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
SwitchInstProfUpdateWrapper SU(*SI);
@@ -4276,6 +4321,17 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
+static void createUnreachableSwitchDefault(SwitchInst *Switch) {
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ BasicBlock *NewDefaultBlock =
+ SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), "");
+ Switch->setDefaultDest(&*NewDefaultBlock);
+ SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front());
+ auto *NewTerminator = NewDefaultBlock->getTerminator();
+ new UnreachableInst(Switch->getContext(), NewTerminator);
+ EraseTerminatorAndDCECond(NewTerminator);
+}
+
/// Turn a switch with two reachable destinations into an integer range
/// comparison and branch.
static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
@@ -4384,6 +4440,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
+ // Clean up the default block - it may have phis or other instructions before
+ // the unreachable terminator.
+ if (!HasDefault)
+ createUnreachableSwitchDefault(SI);
+
// Drop the switch.
SI->eraseFromParent();
@@ -4428,14 +4489,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
SI->getNumCases() == (1ULL << NumUnknownBits)) {
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- BasicBlock *NewDefault =
- SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");
- SI->setDefaultDest(&*NewDefault);
- SplitBlock(&*NewDefault, &NewDefault->front());
- auto *OldTI = NewDefault->getTerminator();
- new UnreachableInst(SI->getContext(), OldTI);
- EraseTerminatorAndDCECond(OldTI);
+ createUnreachableSwitchDefault(SI);
return true;
}
@@ -5031,7 +5085,7 @@ SwitchLookupTable::SwitchLookupTable(
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
// Set the alignment to that of an array items. We will be only loading one
// value out of it.
- Array->setAlignment(DL.getPrefTypeAlignment(ValueType));
+ Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType)));
Kind = ArrayKind;
}
@@ -5260,7 +5314,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Figure out the corresponding result for each case value and phi node in the
// common destination, as well as the min and max case values.
- assert(!empty(SI->cases()));
+ assert(!SI->cases().empty());
SwitchInst::CaseIt CI = SI->case_begin();
ConstantInt *MinCaseVal = CI->getCaseValue();
ConstantInt *MaxCaseVal = CI->getCaseValue();
@@ -5892,7 +5946,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI))
return requestResimplify();
// Look for diamond patterns.
@@ -5900,7 +5954,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (mergeConditionalStores(PBI, BI, DL))
+ if (mergeConditionalStores(PBI, BI, DL, TTI))
return requestResimplify();
return false;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index e0def81d5eee..0324993a8203 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
@@ -47,7 +48,6 @@ static cl::opt<bool>
cl::desc("Enable unsafe double to float "
"shrinking for math lib calls"));
-
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
@@ -177,7 +177,8 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
if (!isOnlyUsedInComparisonWithZero(CI))
return false;
- if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL))
+ if (!isDereferenceableAndAlignedPointer(Str, Align::None(), APInt(64, Len),
+ DL))
return false;
if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
@@ -186,6 +187,67 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
return true;
}
+static void annotateDereferenceableBytes(CallInst *CI,
+ ArrayRef<unsigned> ArgNos,
+ uint64_t DereferenceableBytes) {
+ const Function *F = CI->getCaller();
+ if (!F)
+ return;
+ for (unsigned ArgNo : ArgNos) {
+ uint64_t DerefBytes = DereferenceableBytes;
+ unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
+ if (!llvm::NullPointerIsDefined(F, AS) ||
+ CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ DerefBytes = std::max(CI->getDereferenceableOrNullBytes(
+ ArgNo + AttributeList::FirstArgIndex),
+ DereferenceableBytes);
+
+ if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) <
+ DerefBytes) {
+ CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
+ if (!llvm::NullPointerIsDefined(F, AS) ||
+ CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull);
+ CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes(
+ CI->getContext(), DerefBytes));
+ }
+ }
+}
+
+static void annotateNonNullBasedOnAccess(CallInst *CI,
+ ArrayRef<unsigned> ArgNos) {
+ Function *F = CI->getCaller();
+ if (!F)
+ return;
+
+ for (unsigned ArgNo : ArgNos) {
+ if (CI->paramHasAttr(ArgNo, Attribute::NonNull))
+ continue;
+ unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
+ if (llvm::NullPointerIsDefined(F, AS))
+ continue;
+
+ CI->addParamAttr(ArgNo, Attribute::NonNull);
+ annotateDereferenceableBytes(CI, ArgNo, 1);
+ }
+}
+
+static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
+ Value *Size, const DataLayout &DL) {
+ if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
+ annotateNonNullBasedOnAccess(CI, ArgNos);
+ annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
+ } else if (isKnownNonZero(Size, DL)) {
+ annotateNonNullBasedOnAccess(CI, ArgNos);
+ const APInt *X, *Y;
+ uint64_t DerefMin = 1;
+ if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) {
+ DerefMin = std::min(X->getZExtValue(), Y->getZExtValue());
+ annotateDereferenceableBytes(CI, ArgNos, DerefMin);
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -194,10 +256,13 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
+ annotateNonNullBasedOnAccess(CI, {0, 1});
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
--Len; // Unbias length.
@@ -232,24 +297,34 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
// Extract some information from the instruction.
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
uint64_t Len;
+ annotateNonNullBasedOnAccess(CI, 0);
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullBasedOnAccess(CI, 1);
// We don't do anything if length is not constant.
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size);
+ if (LengthArg) {
Len = LengthArg->getZExtValue();
- else
+ // strncat(x, c, 0) -> x
+ if (!Len)
+ return Dst;
+ } else {
return nullptr;
+ }
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0)
+ if (SrcLen) {
+ annotateDereferenceableBytes(CI, 1, SrcLen);
+ --SrcLen; // Unbias length.
+ } else {
return nullptr;
- --SrcLen; // Unbias length.
+ }
- // Handle the simple, do-nothing cases:
// strncat(x, "", c) -> x
- // strncat(x, c, 0) -> x
- if (SrcLen == 0 || Len == 0)
+ if (SrcLen == 0)
return Dst;
// We don't optimize this case.
@@ -265,13 +340,18 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
Value *SrcStr = CI->getArgOperand(0);
+ annotateNonNullBasedOnAccess(CI, 0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!CharC) {
uint64_t Len = GetStringLength(SrcStr);
- if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
+ if (Len)
+ annotateDereferenceableBytes(CI, 0, Len);
+ else
+ return nullptr;
+ if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
@@ -304,6 +384,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ annotateNonNullBasedOnAccess(CI, 0);
// Cannot fold anything if we're not looking for a constant.
if (!CharC)
@@ -351,7 +432,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
+ if (Len1)
+ annotateDereferenceableBytes(CI, 0, Len1);
uint64_t Len2 = GetStringLength(Str2P);
+ if (Len2)
+ annotateDereferenceableBytes(CI, 1, Len2);
+
if (Len1 && Len2) {
return emitMemCmp(Str1P, Str2P,
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
@@ -374,17 +460,22 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
TLI);
}
+ annotateNonNullBasedOnAccess(CI, {0, 1});
return nullptr;
}
Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ Value *Str1P = CI->getArgOperand(0);
+ Value *Str2P = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
return ConstantInt::get(CI->getType(), 0);
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullBasedOnAccess(CI, {0, 1});
// Get the length argument if it is constant.
uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
Length = LengthArg->getZExtValue();
else
return nullptr;
@@ -393,7 +484,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
return ConstantInt::get(CI->getType(), 0);
if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
+ return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -415,7 +506,11 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
CI->getType());
uint64_t Len1 = GetStringLength(Str1P);
+ if (Len1)
+ annotateDereferenceableBytes(CI, 0, Len1);
uint64_t Len2 = GetStringLength(Str2P);
+ if (Len2)
+ annotateDereferenceableBytes(CI, 1, Len2);
// strncmp to memcmp
if (!HasStr1 && HasStr2) {
@@ -437,20 +532,38 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilder<> &B) {
+ Value *Src = CI->getArgOperand(0);
+ ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen && Size) {
+ annotateDereferenceableBytes(CI, 0, SrcLen);
+ if (SrcLen <= Size->getZExtValue() + 1)
+ return emitStrDup(Src, B, TLI);
+ }
+
+ return nullptr;
+}
+
Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
-
+
+ annotateNonNullBasedOnAccess(CI, {0, 1});
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, 1, Src, 1,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
+ CallInst *NewCI =
+ B.CreateMemCpy(Dst, 1, Src, 1,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
+ NewCI->setAttributes(CI->getAttributes());
return Dst;
}
@@ -464,7 +577,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
Type *PT = Callee->getFunctionType()->getParamType(0);
@@ -474,7 +589,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, 1, Src, 1, LenV);
+ CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, LenV);
+ NewCI->setAttributes(CI->getAttributes());
return DstEnd;
}
@@ -482,37 +598,47 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
- Value *LenOp = CI->getArgOperand(2);
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullBasedOnAccess(CI, 0);
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullBasedOnAccess(CI, 1);
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
+ Len = LengthArg->getZExtValue();
+ else
+ return nullptr;
+
+ // strncpy(x, y, 0) -> x
+ if (Len == 0)
+ return Dst;
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0)
+ if (SrcLen) {
+ annotateDereferenceableBytes(CI, 1, SrcLen);
+ --SrcLen; // Unbias length.
+ } else {
return nullptr;
- --SrcLen;
+ }
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(align 1 x, '\0', y)
- B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+ CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, 1);
+ AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
+ NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
+ CI->getContext(), 0, ArgAttrs));
return Dst;
}
- uint64_t Len;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
- Len = LengthArg->getZExtValue();
- else
- return nullptr;
-
- if (Len == 0)
- return Dst; // strncpy(x, y, 0) -> x
-
// Let strncpy handle the zero padding
if (Len > SrcLen + 1)
return nullptr;
Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
- B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
-
+ CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
+ NewCI->setAttributes(CI->getAttributes());
return Dst;
}
@@ -608,7 +734,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
}
Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
- return optimizeStringLength(CI, B, 8);
+ if (Value *V = optimizeStringLength(CI, B, 8))
+ return V;
+ annotateNonNullBasedOnAccess(CI, 0);
+ return nullptr;
}
Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
@@ -756,21 +885,35 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
+
+ annotateNonNullBasedOnAccess(CI, {0, 1});
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilder<> &B) {
+ if (isKnownNonZero(CI->getOperand(2), DL))
+ annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
Value *SrcStr = CI->getArgOperand(0);
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, 0, Size, DL);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
// memchr(x, y, 0) -> null
- if (LenC && LenC->isZero())
- return Constant::getNullValue(CI->getType());
+ if (LenC) {
+ if (LenC->isZero())
+ return Constant::getNullValue(CI->getType());
+ } else {
+ // From now on we need at least constant length and string.
+ return nullptr;
+ }
- // From now on we need at least constant length and string.
StringRef Str;
- if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
+ if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
return nullptr;
// Truncate the string to LenC. If Str is smaller than LenC we will still only
@@ -913,6 +1056,7 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
Ret = 1;
return ConstantInt::get(CI->getType(), Ret);
}
+
return nullptr;
}
@@ -925,12 +1069,19 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
// Handle constant lengths.
- if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
- if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
- LenC->getZExtValue(), B, DL))
- return Res;
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
+ if (!LenC)
+ return nullptr;
+ // memcmp(d,s,0) -> 0
+ if (LenC->getZExtValue() == 0)
+ return Constant::getNullValue(CI->getType());
+
+ if (Value *Res =
+ optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL))
+ return Res;
return nullptr;
}
@@ -939,9 +1090,9 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
return V;
// memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
- // `bcmp` can be more efficient than memcmp because it only has to know that
- // there is a difference, not where it is.
- if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
+ // bcmp can be more efficient than memcmp because it only has to know that
+ // there is a difference, not how different one is to the other.
+ if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) {
Value *LHS = CI->getArgOperand(0);
Value *RHS = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
@@ -956,16 +1107,37 @@ Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
// memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI =
+ B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
+Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) {
+ Value *Dst = CI->getArgOperand(0);
+ Value *N = CI->getArgOperand(2);
+ // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
+ CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N);
+ NewCI->setAttributes(CI->getAttributes());
+ return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
+}
+
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
// memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
- B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI =
+ B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -1003,25 +1175,29 @@ Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
const DataLayout &DL = Malloc->getModule()->getDataLayout();
IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
- Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
- Malloc->getArgOperand(0), Malloc->getAttributes(),
- B, *TLI);
- if (!Calloc)
- return nullptr;
-
- Malloc->replaceAllUsesWith(Calloc);
- eraseFromParent(Malloc);
+ if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
+ Malloc->getArgOperand(0),
+ Malloc->getAttributes(), B, *TLI)) {
+ substituteInParent(Malloc, Calloc);
+ return Calloc;
+ }
- return Calloc;
+ return nullptr;
}
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, 0, Size, DL);
+ if (isa<IntrinsicInst>(CI))
+ return nullptr;
+
if (auto *Calloc = foldMallocMemset(CI, B))
return Calloc;
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -1096,21 +1272,18 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
if (!V[0] || (isBinary && !V[1]))
return nullptr;
- StringRef CalleeNm = CalleeFn->getName();
- AttributeList CalleeAt = CalleeFn->getAttributes();
- bool CalleeIn = CalleeFn->isIntrinsic();
-
// If call isn't an intrinsic, check that it isn't within a function with the
// same name as the float version of this call, otherwise the result is an
// infinite loop. For example, from MinGW-w64:
//
// float expf(float val) { return (float) exp((double) val); }
- if (!CalleeIn) {
- const Function *Fn = CI->getFunction();
- StringRef FnName = Fn->getName();
- if (FnName.back() == 'f' &&
- FnName.size() == (CalleeNm.size() + 1) &&
- FnName.startswith(CalleeNm))
+ StringRef CalleeName = CalleeFn->getName();
+ bool IsIntrinsic = CalleeFn->isIntrinsic();
+ if (!IsIntrinsic) {
+ StringRef CallerName = CI->getFunction()->getName();
+ if (!CallerName.empty() && CallerName.back() == 'f' &&
+ CallerName.size() == (CalleeName.size() + 1) &&
+ CallerName.startswith(CalleeName))
return nullptr;
}
@@ -1120,16 +1293,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
// g((double) float) -> (double) gf(float)
Value *R;
- if (CalleeIn) {
+ if (IsIntrinsic) {
Module *M = CI->getModule();
Intrinsic::ID IID = CalleeFn->getIntrinsicID();
Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
+ } else {
+ AttributeList CalleeAttrs = CalleeFn->getAttributes();
+ R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs)
+ : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs);
}
- else
- R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt)
- : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt);
-
return B.CreateFPExt(R, B.getDoubleTy());
}
@@ -1234,9 +1407,25 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
return InnerChain[Exp];
}
+// Return a properly extended 32-bit integer if the operation is an itofp.
+static Value *getIntToFPVal(Value *I2F, IRBuilder<> &B) {
+ if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
+ Value *Op = cast<Instruction>(I2F)->getOperand(0);
+ // Make sure that the exponent fits inside an int32_t,
+ // thus avoiding any range issues that FP has not.
+ unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits();
+ if (BitWidth < 32 ||
+ (BitWidth == 32 && isa<SIToFPInst>(I2F)))
+ return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getInt32Ty())
+ : B.CreateZExt(Op, B.getInt32Ty());
+ }
+
+ return nullptr;
+}
+
/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
-/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x);
-/// exp2(log2(n) * x) for pow(n, x).
+/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
+/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
@@ -1269,9 +1458,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
StringRef ExpName;
Intrinsic::ID ID;
Value *ExpFn;
- LibFunc LibFnFloat;
- LibFunc LibFnDouble;
- LibFunc LibFnLongDouble;
+ LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble;
switch (LibFn) {
default:
@@ -1305,9 +1492,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
// elimination cannot be trusted to remove it, since it may have side
// effects (e.g., errno). When the only consumer for the original
// exp{,2}() is pow(), then it has to be explicitly erased.
- BaseFn->replaceAllUsesWith(ExpFn);
- eraseFromParent(BaseFn);
-
+ substituteInParent(BaseFn, ExpFn);
return ExpFn;
}
}
@@ -1318,8 +1503,18 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
return nullptr;
+ // pow(2.0, itofp(x)) -> ldexp(1.0, x)
+ if (match(Base, m_SpecificFP(2.0)) &&
+ (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
+ hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ if (Value *ExpoI = getIntToFPVal(Expo, B))
+ return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI,
+ LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
+ B, Attrs);
+ }
+
// pow(2.0 ** n, x) -> exp2(n * x)
- if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+ if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
APFloat BaseR = APFloat(1.0);
BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
BaseR = BaseR / *BaseF;
@@ -1344,7 +1539,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
// pow(10.0, x) -> exp10(x)
// TODO: There is no exp10() intrinsic yet, but some day there shall be one.
if (match(Base, m_SpecificFP(10.0)) &&
- hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
LibFunc_exp10l, B, Attrs);
@@ -1359,17 +1554,15 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
if (Log) {
Value *FMul = B.CreateFMul(Log, Expo, "mul");
- if (Pow->doesNotAccessMemory()) {
+ if (Pow->doesNotAccessMemory())
return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
FMul, "exp2");
- } else {
- if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l))
- return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l, B, Attrs);
- }
+ else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l))
+ return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs);
}
}
+
return nullptr;
}
@@ -1384,8 +1577,7 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
}
// Otherwise, use the libcall for sqrt().
- if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl))
+ if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
// TODO: We also should check that the target can in fact lower the sqrt()
// libcall. We currently have no way to ask this question, so we ask if
// the target has a sqrt() libcall, which is not exactly the same.
@@ -1452,7 +1644,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
bool Ignored;
// Bail out if simplifying libcalls to pow() is disabled.
- if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
+ if (!hasFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
return nullptr;
// Propagate the math semantics from the call to any created instructions.
@@ -1480,8 +1672,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
if (match(Expo, m_SpecificFP(-1.0)))
return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
- // pow(x, 0.0) -> 1.0
- if (match(Expo, m_SpecificFP(0.0)))
+ // pow(x, +/-0.0) -> 1.0
+ if (match(Expo, m_AnyZeroFP()))
return ConstantFP::get(Ty, 1.0);
// pow(x, 1.0) -> x
@@ -1558,16 +1750,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// powf(x, itofp(y)) -> powi(x, y)
if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
- Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
- Value *NewExpo = nullptr;
- unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
- if (isa<SIToFPInst>(Expo) && BitWidth == 32)
- NewExpo = IntExpo;
- else if (BitWidth < 32)
- NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
- : B.CreateZExt(IntExpo, B.getInt32Ty());
- if (NewExpo)
- return createPowWithIntegerExponent(Base, NewExpo, M, B);
+ if (Value *ExpoI = getIntToFPVal(Expo, B))
+ return createPowWithIntegerExponent(Base, ExpoI, M, B);
}
return Shrunk;
@@ -1575,45 +1759,25 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
+ Value *Ret = nullptr;
+ if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
+ hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
+ Type *Ty = CI->getType();
Value *Op = CI->getArgOperand(0);
+
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- LibFunc LdExp = LibFunc_ldexpl;
- if (Op->getType()->isFloatTy())
- LdExp = LibFunc_ldexpf;
- else if (Op->getType()->isDoubleTy())
- LdExp = LibFunc_ldexp;
-
- if (TLI->has(LdExp)) {
- Value *LdExpArg = nullptr;
- if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
- } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
- }
-
- if (LdExpArg) {
- Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f));
- if (!Op->getType()->isFloatTy())
- One = ConstantExpr::getFPExtend(One, Op->getType());
-
- Module *M = CI->getModule();
- FunctionCallee NewCallee = M->getOrInsertFunction(
- TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty());
- CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
- }
+ if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
+ hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ if (Value *Exp = getIntToFPVal(Op, B))
+ return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
+ LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
+ B, CI->getCalledFunction()->getAttributes());
}
+
return Ret;
}
@@ -1644,48 +1808,155 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
}
-Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
+Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) {
+ Function *LogFn = Log->getCalledFunction();
+ AttributeList Attrs = LogFn->getAttributes();
+ StringRef LogNm = LogFn->getName();
+ Intrinsic::ID LogID = LogFn->getIntrinsicID();
+ Module *Mod = Log->getModule();
+ Type *Ty = Log->getType();
Value *Ret = nullptr;
- StringRef Name = Callee->getName();
- if (UnsafeFPShrink && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
- if (!CI->isFast())
- return Ret;
- Value *Op1 = CI->getArgOperand(0);
- auto *OpC = dyn_cast<CallInst>(Op1);
+ if (UnsafeFPShrink && hasFloatVersion(LogNm))
+ Ret = optimizeUnaryDoubleFP(Log, B, true);
// The earlier call must also be 'fast' in order to do these transforms.
- if (!OpC || !OpC->isFast())
+ CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
+ if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse())
return Ret;
- // log(pow(x,y)) -> y*log(x)
- // This is only applicable to log, log2, log10.
- if (Name != "log" && Name != "log2" && Name != "log10")
+ LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb;
+
+ // This is only applicable to log(), log2(), log10().
+ if (TLI->getLibFunc(LogNm, LogLb))
+ switch (LogLb) {
+ case LibFunc_logf:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_logl:
+ LogID = Intrinsic::log;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ case LibFunc_log2f:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log2:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_log2l:
+ LogID = Intrinsic::log2;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ case LibFunc_log10f:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ break;
+ case LibFunc_log10:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ break;
+ case LibFunc_log10l:
+ LogID = Intrinsic::log10;
+ ExpLb = LibFunc_expl;
+ Exp2Lb = LibFunc_exp2l;
+ Exp10Lb = LibFunc_exp10l;
+ PowLb = LibFunc_powl;
+ break;
+ default:
+ return Ret;
+ }
+ else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
+ LogID == Intrinsic::log10) {
+ if (Ty->getScalarType()->isFloatTy()) {
+ ExpLb = LibFunc_expf;
+ Exp2Lb = LibFunc_exp2f;
+ Exp10Lb = LibFunc_exp10f;
+ PowLb = LibFunc_powf;
+ } else if (Ty->getScalarType()->isDoubleTy()) {
+ ExpLb = LibFunc_exp;
+ Exp2Lb = LibFunc_exp2;
+ Exp10Lb = LibFunc_exp10;
+ PowLb = LibFunc_pow;
+ } else
+ return Ret;
+ } else
return Ret;
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- FMF.setFast();
- B.setFastMathFlags(FMF);
+ B.setFastMathFlags(FastMathFlags::getFast());
+
+ Intrinsic::ID ArgID = Arg->getIntrinsicID();
+ LibFunc ArgLb = NotLibFunc;
+ TLI->getLibFunc(Arg, ArgLb);
+
+ // log(pow(x,y)) -> y*log(x)
+ if (ArgLb == PowLb || ArgID == Intrinsic::pow) {
+ Value *LogX =
+ Log->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
+ Arg->getOperand(0), "log")
+ : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs);
+ Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul");
+ // Since pow() may have side effects, e.g. errno,
+ // dead code elimination may not be trusted to remove it.
+ substituteInParent(Arg, MulY);
+ return MulY;
+ }
+
+ // log(exp{,2,10}(y)) -> y*log({e,2,10})
+ // TODO: There is no exp10() intrinsic yet.
+ if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb ||
+ ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) {
+ Constant *Eul;
+ if (ArgLb == ExpLb || ArgID == Intrinsic::exp)
+ // FIXME: Add more precise value of e for long double.
+ Eul = ConstantFP::get(Log->getType(), numbers::e);
+ else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2)
+ Eul = ConstantFP::get(Log->getType(), 2.0);
+ else
+ Eul = ConstantFP::get(Log->getType(), 10.0);
+ Value *LogE = Log->doesNotAccessMemory()
+ ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
+ Eul, "log")
+ : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs);
+ Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
+ // Since exp() may have side effects, e.g. errno,
+ // dead code elimination may not be trusted to remove it.
+ substituteInParent(Arg, MulY);
+ return MulY;
+ }
- LibFunc Func;
- Function *F = OpC->getCalledFunction();
- if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow))
- return B.CreateFMul(OpC->getArgOperand(1),
- emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
- Callee->getAttributes()), "mul");
-
- // log(exp2(y)) -> y*log(2)
- if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
- TLI->has(Func) && Func == LibFunc_exp2)
- return B.CreateFMul(
- OpC->getArgOperand(0),
- emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
- Callee->getName(), B, Callee->getAttributes()),
- "logmul");
return Ret;
}
@@ -2137,6 +2408,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
return New;
}
+ annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
@@ -2231,21 +2503,21 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
return New;
}
+ annotateNonNullBasedOnAccess(CI, {0, 1});
return nullptr;
}
Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr))
- return nullptr;
-
// Check for size
ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!Size)
return nullptr;
uint64_t N = Size->getZExtValue();
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr))
+ return nullptr;
// If we just have a format string (nothing else crazy) transform it.
if (CI->getNumArgOperands() == 3) {
@@ -2318,6 +2590,8 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) {
return V;
}
+ if (isKnownNonZero(CI->getOperand(1), DL))
+ annotateNonNullBasedOnAccess(CI, 0);
return nullptr;
}
@@ -2503,6 +2777,7 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
+ annotateNonNullBasedOnAccess(CI, 0);
if (!CI->use_empty())
return nullptr;
@@ -2515,6 +2790,12 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) {
+ // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
+ return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1,
+ CI->getArgOperand(2));
+}
+
bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
LibFunc Func;
SmallString<20> FloatFuncName = FuncName;
@@ -2557,6 +2838,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrLen(CI, Builder);
case LibFunc_strpbrk:
return optimizeStrPBrk(CI, Builder);
+ case LibFunc_strndup:
+ return optimizeStrNDup(CI, Builder);
case LibFunc_strtol:
case LibFunc_strtod:
case LibFunc_strtof:
@@ -2573,12 +2856,16 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrStr(CI, Builder);
case LibFunc_memchr:
return optimizeMemChr(CI, Builder);
+ case LibFunc_memrchr:
+ return optimizeMemRChr(CI, Builder);
case LibFunc_bcmp:
return optimizeBCmp(CI, Builder);
case LibFunc_memcmp:
return optimizeMemCmp(CI, Builder);
case LibFunc_memcpy:
return optimizeMemCpy(CI, Builder);
+ case LibFunc_mempcpy:
+ return optimizeMemPCpy(CI, Builder);
case LibFunc_memmove:
return optimizeMemMove(CI, Builder);
case LibFunc_memset:
@@ -2587,6 +2874,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeRealloc(CI, Builder);
case LibFunc_wcslen:
return optimizeWcslen(CI, Builder);
+ case LibFunc_bcopy:
+ return optimizeBCopy(CI, Builder);
default:
break;
}
@@ -2626,11 +2915,21 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
case LibFunc_sqrt:
case LibFunc_sqrtl:
return optimizeSqrt(CI, Builder);
+ case LibFunc_logf:
case LibFunc_log:
+ case LibFunc_logl:
+ case LibFunc_log10f:
case LibFunc_log10:
+ case LibFunc_log10l:
+ case LibFunc_log1pf:
case LibFunc_log1p:
+ case LibFunc_log1pl:
+ case LibFunc_log2f:
case LibFunc_log2:
+ case LibFunc_log2l:
+ case LibFunc_logbf:
case LibFunc_logb:
+ case LibFunc_logbl:
return optimizeLog(CI, Builder);
case LibFunc_tan:
case LibFunc_tanf:
@@ -2721,10 +3020,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
case Intrinsic::exp2:
return optimizeExp2(CI, Builder);
case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
return optimizeSqrt(CI, Builder);
// TODO: Use foldMallocMemset() with memset intrinsic.
+ case Intrinsic::memset:
+ return optimizeMemSet(CI, Builder);
+ case Intrinsic::memcpy:
+ return optimizeMemCpy(CI, Builder);
+ case Intrinsic::memmove:
+ return optimizeMemMove(CI, Builder);
default:
return nullptr;
}
@@ -2740,8 +3047,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
IRBuilder<> TmpBuilder(SimplifiedCI);
if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
// If we were able to further simplify, remove the now redundant call.
- SimplifiedCI->replaceAllUsesWith(V);
- eraseFromParent(SimplifiedCI);
+ substituteInParent(SimplifiedCI, V);
return V;
}
}
@@ -2898,7 +3204,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
// If the length is 0 we don't know how long it is and so we can't
// remove the check.
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, *StrOp, Len);
+ else
return false;
return ObjSizeCI->getZExtValue() >= Len;
}
@@ -2915,8 +3223,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI = B.CreateMemCpy(
+ CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
return nullptr;
@@ -2925,8 +3234,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
+ CallInst *NewCI = B.CreateMemMove(
+ CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
return nullptr;
@@ -2938,7 +3248,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ CallInst *NewCI =
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
return nullptr;
@@ -2974,7 +3286,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
uint64_t Len = GetStringLength(Src);
- if (Len == 0)
+ if (Len)
+ annotateDereferenceableBytes(CI, 1, Len);
+ else
return nullptr;
Type *SizeTTy = DL.getIntPtrType(CI->getContext());
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 456724779b43..5d380dcf231c 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -380,11 +380,11 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
// TODO see if there is a more elegant solution to selecting the rewrite
// descriptor type
if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>(
+ DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
Source, Target, Naked));
else
DL->push_back(
- llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
+ std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
return true;
}
@@ -442,11 +442,11 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
}
if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
+ DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
Source, Target,
/*Naked*/ false));
else
- DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>(
+ DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
Source, Transform));
return true;
@@ -505,11 +505,11 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
}
if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>(
+ DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
Source, Target,
/*Naked*/ false));
else
- DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>(
+ DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
Source, Transform));
return true;
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index a77bf50fe10b..591e1fd2dbee 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -431,7 +431,7 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
NewLoad->takeName(SrcVal);
- NewLoad->setAlignment(SrcVal->getAlignment());
+ NewLoad->setAlignment(MaybeAlign(SrcVal->getAlignment()));
LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index fbc3407c301f..da68d3713b40 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -27,8 +27,8 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
@@ -66,7 +66,7 @@ struct WorklistEntry {
enum EntryKind {
MapGlobalInit,
MapAppendingVar,
- MapGlobalAliasee,
+ MapGlobalIndirectSymbol,
RemapFunction
};
struct GVInitTy {
@@ -77,9 +77,9 @@ struct WorklistEntry {
GlobalVariable *GV;
Constant *InitPrefix;
};
- struct GlobalAliaseeTy {
- GlobalAlias *GA;
- Constant *Aliasee;
+ struct GlobalIndirectSymbolTy {
+ GlobalIndirectSymbol *GIS;
+ Constant *Target;
};
unsigned Kind : 2;
@@ -89,7 +89,7 @@ struct WorklistEntry {
union {
GVInitTy GVInit;
AppendingGVTy AppendingGV;
- GlobalAliaseeTy GlobalAliasee;
+ GlobalIndirectSymbolTy GlobalIndirectSymbol;
Function *RemapF;
} Data;
};
@@ -161,8 +161,8 @@ public:
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers,
unsigned MCID);
- void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID);
+ void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target,
+ unsigned MCID);
void scheduleRemapFunction(Function &F, unsigned MCID);
void flush();
@@ -172,7 +172,7 @@ private:
void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers);
- void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee);
+ void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target);
void remapFunction(Function &F, ValueToValueMapTy &VM);
ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
@@ -774,20 +774,6 @@ Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) {
return *getMappedOp(&FirstN);
}
-namespace {
-
-struct MapMetadataDisabler {
- ValueToValueMapTy &VM;
-
- MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) {
- VM.disableMapMetadata();
- }
-
- ~MapMetadataDisabler() { VM.enableMapMetadata(); }
-};
-
-} // end anonymous namespace
-
Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
// If the value already exists in the map, use it.
if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD))
@@ -802,9 +788,6 @@ Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
return const_cast<Metadata *>(MD);
if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) {
- // Disallow recursion into metadata mapping through mapValue.
- MapMetadataDisabler MMD(getVM());
-
// Don't memoize ConstantAsMetadata. Instead of lasting until the
// LLVMContext is destroyed, they can be deleted when the GlobalValue they
// reference is destructed. These aren't super common, so the extra
@@ -846,9 +829,9 @@ void Mapper::flush() {
AppendingInits.resize(PrefixSize);
break;
}
- case WorklistEntry::MapGlobalAliasee:
- E.Data.GlobalAliasee.GA->setAliasee(
- mapConstant(E.Data.GlobalAliasee.Aliasee));
+ case WorklistEntry::MapGlobalIndirectSymbol:
+ E.Data.GlobalIndirectSymbol.GIS->setIndirectSymbol(
+ mapConstant(E.Data.GlobalIndirectSymbol.Target));
break;
case WorklistEntry::RemapFunction:
remapFunction(*E.Data.RemapF);
@@ -1041,16 +1024,16 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
AppendingInits.append(NewMembers.begin(), NewMembers.end());
}
-void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID) {
- assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule");
+void Mapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
+ Constant &Target, unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GIS).second && "Should not reschedule");
assert(MCID < MCs.size() && "Invalid mapping context");
WorklistEntry WE;
- WE.Kind = WorklistEntry::MapGlobalAliasee;
+ WE.Kind = WorklistEntry::MapGlobalIndirectSymbol;
WE.MCID = MCID;
- WE.Data.GlobalAliasee.GA = &GA;
- WE.Data.GlobalAliasee.Aliasee = &Aliasee;
+ WE.Data.GlobalIndirectSymbol.GIS = &GIS;
+ WE.Data.GlobalIndirectSymbol.Target = &Target;
Worklist.push_back(WE);
}
@@ -1147,9 +1130,10 @@ void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
}
-void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID) {
- getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID);
+void ValueMapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
+ Constant &Target,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapGlobalIndirectSymbol(GIS, Target, MCID);
}
void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {